]> git.saurik.com Git - apple/cf.git/blob - CFStringUtilities.c
CF-550.tar.gz
[apple/cf.git] / CFStringUtilities.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFStringUtilities.c
24 Copyright (c) 1999-2009, Apple Inc. All rights reserved.
25 Responsibility: Aki Inoue
26 */
27
28 #include "CFInternal.h"
29 #include <CoreFoundation/CFStringEncodingConverterExt.h>
30 #include <CoreFoundation/CFUniChar.h>
31 #include <CoreFoundation/CFStringEncodingExt.h>
32 #include "CFStringEncodingDatabase.h"
33 #include "CFICUConverters.h"
34 #include <CoreFoundation/CFPreferences.h>
35 #include <limits.h>
36 #include <stdlib.h>
37 #include <unicode/ucol.h>
38 #include <unicode/ucoleitr.h>
39 #include <string.h>
40
41 #if DEPLOYMENT_TARGET_WINDOWS
42 #include <tchar.h>
43 #endif
44
45
46 Boolean CFStringIsEncodingAvailable(CFStringEncoding theEncoding) {
47 switch (theEncoding) {
48 case kCFStringEncodingASCII: // Built-in encodings
49 case kCFStringEncodingMacRoman:
50 case kCFStringEncodingUTF8:
51 case kCFStringEncodingNonLossyASCII:
52 case kCFStringEncodingWindowsLatin1:
53 case kCFStringEncodingNextStepLatin:
54 case kCFStringEncodingUTF16:
55 case kCFStringEncodingUTF16BE:
56 case kCFStringEncodingUTF16LE:
57 case kCFStringEncodingUTF32:
58 case kCFStringEncodingUTF32BE:
59 case kCFStringEncodingUTF32LE:
60 return true;
61
62 default:
63 return CFStringEncodingIsValidEncoding(theEncoding);
64 }
65 }
66
67 const CFStringEncoding* CFStringGetListOfAvailableEncodings() {
68 return (const CFStringEncoding *)CFStringEncodingListOfAvailableEncodings();
69 }
70
71 CFStringRef CFStringGetNameOfEncoding(CFStringEncoding theEncoding) {
72 static CFMutableDictionaryRef mappingTable = NULL;
73 CFStringRef theName = mappingTable ? (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)(uintptr_t)theEncoding) : NULL;
74
75 if (!theName) {
76 const char *encodingName = __CFStringEncodingGetName(theEncoding);
77
78 if (encodingName) {
79 theName = CFStringCreateWithCString(kCFAllocatorSystemDefault, encodingName, kCFStringEncodingASCII);
80 }
81
82 if (theName) {
83 if (!mappingTable) mappingTable = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks);
84
85 CFDictionaryAddValue(mappingTable, (const void*)(uintptr_t)theEncoding, (const void*)theName);
86 CFRelease(theName);
87 }
88 }
89
90 return theName;
91 }
92
93 CFStringEncoding CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName) {
94 CFStringEncoding encoding = kCFStringEncodingInvalidId;
95 #define BUFFER_SIZE (100)
96 char buffer[BUFFER_SIZE];
97 const char *name = CFStringGetCStringPtr(charsetName, __CFStringGetEightBitStringEncoding());
98
99 if (NULL == name) {
100 if (false == CFStringGetCString(charsetName, buffer, BUFFER_SIZE, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId;
101
102 name = buffer;
103 }
104
105 encoding = __CFStringEncodingGetFromCanonicalName(name);
106
107 if (kCFStringEncodingInvalidId == encoding) encoding = __CFStringEncodingGetFromICUName(name);
108
109
110 return encoding;
111 }
112
113 CFStringRef CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding) {
114 CFStringRef name = NULL;
115 CFIndex value = encoding;
116 static CFMutableDictionaryRef mappingTable = NULL;
117 static CFSpinLock_t lock = CFSpinLockInit;
118
119 __CFSpinLock(&lock);
120 name = ((NULL == mappingTable) ? NULL : (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)value));
121
122 if (NULL == name) {
123 #define STACK_BUFFER_SIZE (100)
124 char buffer[STACK_BUFFER_SIZE];
125
126 if (__CFStringEncodingGetCanonicalName(encoding, buffer, STACK_BUFFER_SIZE)) name = CFStringCreateWithCString(NULL, buffer, kCFStringEncodingASCII);
127
128
129 if (NULL != name) {
130 CFIndex value = encoding;
131
132 if (NULL == mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, &kCFTypeDictionaryValueCallBacks);
133
134 CFDictionaryAddValue(mappingTable, (const void*)value, (const void*)name);
135 CFRelease(name);
136 }
137 }
138 __CFSpinUnlock(&lock);
139
140 return name;
141 }
142
143 enum {
144 NSASCIIStringEncoding = 1, /* 0..127 only */
145 NSNEXTSTEPStringEncoding = 2,
146 NSJapaneseEUCStringEncoding = 3,
147 NSUTF8StringEncoding = 4,
148 NSISOLatin1StringEncoding = 5,
149 NSSymbolStringEncoding = 6,
150 NSNonLossyASCIIStringEncoding = 7,
151 NSShiftJISStringEncoding = 8,
152 NSISOLatin2StringEncoding = 9,
153 NSUnicodeStringEncoding = 10,
154 NSWindowsCP1251StringEncoding = 11, /* Cyrillic; same as AdobeStandardCyrillic */
155 NSWindowsCP1252StringEncoding = 12, /* WinLatin1 */
156 NSWindowsCP1253StringEncoding = 13, /* Greek */
157 NSWindowsCP1254StringEncoding = 14, /* Turkish */
158 NSWindowsCP1250StringEncoding = 15, /* WinLatin2 */
159 NSISO2022JPStringEncoding = 21, /* ISO 2022 Japanese encoding for e-mail */
160 NSMacOSRomanStringEncoding = 30,
161
162 NSProprietaryStringEncoding = 65536 /* Installation-specific encoding */
163 };
164
165 #define NSENCODING_MASK (1 << 31)
166
167 unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding) {
168 switch (theEncoding & 0xFFF) {
169 case kCFStringEncodingUnicode:
170 if (theEncoding == kCFStringEncodingUTF16) return NSUnicodeStringEncoding;
171 else if (theEncoding == kCFStringEncodingUTF8) return NSUTF8StringEncoding;
172 break;
173
174 case kCFStringEncodingWindowsLatin1: return NSWindowsCP1252StringEncoding;
175 case kCFStringEncodingMacRoman: return NSMacOSRomanStringEncoding;
176
177 case kCFStringEncodingASCII: return NSASCIIStringEncoding;
178
179 case kCFStringEncodingDOSJapanese: return NSShiftJISStringEncoding;
180 case kCFStringEncodingWindowsCyrillic: return NSWindowsCP1251StringEncoding;
181 case kCFStringEncodingWindowsGreek: return NSWindowsCP1253StringEncoding;
182 case kCFStringEncodingWindowsLatin5: return NSWindowsCP1254StringEncoding;
183 case kCFStringEncodingWindowsLatin2: return NSWindowsCP1250StringEncoding;
184 case kCFStringEncodingISOLatin1: return NSISOLatin1StringEncoding;
185
186 case kCFStringEncodingNonLossyASCII: return NSNonLossyASCIIStringEncoding;
187 case kCFStringEncodingEUC_JP: return NSJapaneseEUCStringEncoding;
188 case kCFStringEncodingMacSymbol: return NSSymbolStringEncoding;
189 case kCFStringEncodingISOLatin2: return NSISOLatin2StringEncoding;
190 case kCFStringEncodingISO_2022_JP: return NSISO2022JPStringEncoding;
191 case kCFStringEncodingNextStepLatin: return NSNEXTSTEPStringEncoding;
192 }
193
194 return NSENCODING_MASK | theEncoding;
195 }
196
197 CFStringEncoding CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding) {
198 const uint16_t encodings[] = {
199 kCFStringEncodingASCII,
200 kCFStringEncodingNextStepLatin,
201 kCFStringEncodingEUC_JP,
202 0,
203 kCFStringEncodingISOLatin1,
204 kCFStringEncodingMacSymbol,
205 kCFStringEncodingNonLossyASCII,
206 kCFStringEncodingDOSJapanese,
207 kCFStringEncodingISOLatin2,
208 kCFStringEncodingUTF16,
209 kCFStringEncodingWindowsCyrillic,
210 kCFStringEncodingWindowsLatin1,
211 kCFStringEncodingWindowsGreek,
212 kCFStringEncodingWindowsLatin5,
213 kCFStringEncodingWindowsLatin2
214 };
215
216 if (NSUTF8StringEncoding == theEncoding) return kCFStringEncodingUTF8;
217
218 if ((theEncoding > 0) && (theEncoding <= NSWindowsCP1250StringEncoding)) return encodings[theEncoding - 1];
219
220 switch (theEncoding) {
221 case NSMacOSRomanStringEncoding: return kCFStringEncodingMacRoman;
222 case NSISO2022JPStringEncoding: return kCFStringEncodingISO_2022_JP;
223
224 default:
225 return ((theEncoding & NSENCODING_MASK) ? theEncoding & ~NSENCODING_MASK : kCFStringEncodingInvalidId);
226 }
227 }
228
229 UInt32 CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding) {
230 uint16_t codepage = __CFStringEncodingGetWindowsCodePage(theEncoding);
231
232 return ((0 == codepage) ? kCFStringEncodingInvalidId : codepage);
233 }
234
235 CFStringEncoding CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding) {
236 return __CFStringEncodingGetFromWindowsCodePage(theEncoding);
237 }
238
239 CFStringEncoding CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding) {
240 CFStringEncoding macEncoding = __CFStringEncodingGetMostCompatibleMacScript(encoding);
241
242
243 return macEncoding;
244 }
245
246 #define kCFStringCompareAllocationIncrement (128)
247
248
249 // -------------------------------------------------------------------------------------------------
250 // CompareSpecials - ignore case & diacritic differences
251 //
252 // Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
253 // Fullwidth & halfwidth are in range FF00-FFEF
254 // Parenthesized & circled are in range 3200-32FF
255 // -------------------------------------------------------------------------------------------------
256
257 enum {
258 kUpperCaseWeightMin = 0x80 | 0x0F,
259 kUpperCaseWeightMax = 0x80 | 0x17,
260 kUpperToLowerDelta = 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
261 kMaskPrimarySecondary = 0xFFFFFF00,
262 kMaskPrimaryOnly = 0xFFFF0000,
263 kMaskSecondaryOnly = 0x0000FF00,
264 kMaskCaseTertiary = 0x000000FF // 2 hi bits case, 6 lo bits tertiary
265 };
266
267 static SInt32 __CompareSpecials(const UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length) {
268 UErrorCode icuStatus = U_ZERO_ERROR;
269 SInt32 orderWidth = 0;
270 SInt32 orderCompos = 0;
271
272 UCollationElements * collElems1 = ucol_openElements(collator, (const UChar *)text1Ptr, text1Length, &icuStatus);
273 UCollationElements * collElems2 = ucol_openElements(collator, (const UChar *)text2Ptr, text2Length, &icuStatus);
274 if (U_SUCCESS(icuStatus)) {
275 int32_t startOffset1 = 0;
276 int32_t startOffset2 = 0;
277
278 while (true) {
279 int32_t elemOrder1, elemOrder2;
280 int32_t offset1, offset2;
281
282 elemOrder1 = ucol_next(collElems1, &icuStatus);
283 elemOrder2 = ucol_next(collElems2, &icuStatus);
284 if ( U_FAILURE(icuStatus) || elemOrder1 == (int32_t)UCOL_NULLORDER || elemOrder2 == (int32_t)UCOL_NULLORDER ) {
285 break;
286 }
287
288 offset1 = ucol_getOffset(collElems1);
289 offset2 = ucol_getOffset(collElems2);
290 if ( (elemOrder1 & kMaskPrimarySecondary) == (elemOrder2 & kMaskPrimarySecondary) ) {
291 if ( (elemOrder1 & kMaskPrimaryOnly) != 0 ) {
292 // keys may differ in case, width, circling, etc.
293
294 int32_t tertiary1 = (elemOrder1 & kMaskCaseTertiary);
295 int32_t tertiary2 = (elemOrder2 & kMaskCaseTertiary);
296 // fold upper to lower case
297 if (tertiary1 >= kUpperCaseWeightMin && tertiary1 <= kUpperCaseWeightMax) {
298 tertiary1 -= kUpperToLowerDelta;
299 }
300 if (tertiary2 >= kUpperCaseWeightMin && tertiary2 <= kUpperCaseWeightMax) {
301 tertiary2 -= kUpperToLowerDelta;
302 }
303 // now compare
304 if (tertiary1 != tertiary2) {
305 orderWidth = (tertiary1 < tertiary2)? -1: 1;
306 break;
307 }
308
309 } else if ( (elemOrder1 & kMaskSecondaryOnly) != 0 ) {
310 // primary weights are both zero, but secondaries are not.
311 if ( orderCompos == 0 && (options & kCFCompareNonliteral) == 0 ) {
312 // We have a code element which is a diacritic.
313 // It may have come from a composed char or a combining char.
314 // If it came from a combining char (longer element length) it sorts first.
315 // This is only an approximation to what the Mac OS 9 code did, but this is an
316 // unusual case anyway.
317 int32_t elem1Length = offset1 - startOffset1;
318 int32_t elem2Length = offset2 - startOffset2;
319 if (elem1Length != elem2Length) {
320 orderCompos = (elem1Length > elem2Length)? -1: 1;
321 }
322 }
323 }
324 }
325
326 startOffset1 = offset1;
327 startOffset2 = offset2;
328 }
329 ucol_closeElements(collElems1);
330 ucol_closeElements(collElems2);
331 }
332
333 return (orderWidth != 0)? orderWidth: orderCompos;
334 }
335
336 static SInt32 __CompareCodePoints(const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length ) {
337 const UniChar * text1P = text1Ptr;
338 const UniChar * text2P = text2Ptr;
339 UInt32 textLimit = (text1Length <= text2Length)? text1Length: text2Length;
340 UInt32 textCounter;
341 SInt32 orderResult = 0;
342
343 // Loop through either string...the first difference differentiates this.
344 for (textCounter = 0; textCounter < textLimit && *text1P == *text2P; textCounter++) {
345 text1P++;
346 text2P++;
347 }
348 if (textCounter < textLimit) {
349 // code point difference
350 orderResult = (*text1P < *text2P) ? -1 : 1;
351 } else if (text1Length != text2Length) {
352 // one string has extra stuff at end
353 orderResult = (text1Length < text2Length) ? -1 : 1;
354 }
355 return orderResult;
356 }
357
358
359 extern const CFStringRef __kCFLocaleCollatorID;
360
361 static UCollator *__CFStringCreateCollator(CFLocaleRef compareLocale) {
362 CFStringRef canonLocaleCFStr = (CFStringRef)CFLocaleGetValue(compareLocale, __kCFLocaleCollatorID);
363 char icuLocaleStr[128] = {0};
364 CFStringGetCString(canonLocaleCFStr, icuLocaleStr, sizeof(icuLocaleStr), kCFStringEncodingASCII);
365 UErrorCode icuStatus = U_ZERO_ERROR;
366 UCollator * collator = ucol_open(icuLocaleStr, &icuStatus);
367 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
368 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &icuStatus);
369 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
370 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
371 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
372 return collator;
373 }
374
375 #define kCFMaxCachedDefaultCollators (8)
376 static UCollator *__CFDefaultCollators[kCFMaxCachedDefaultCollators];
377 static CFIndex __CFDefaultCollatorsCount = 0;
378 static const void *__CFDefaultCollatorLocale = NULL;
379 static CFSpinLock_t __CFDefaultCollatorLock = CFSpinLockInit;
380
381 static UCollator *__CFStringCopyDefaultCollator(CFLocaleRef compareLocale) {
382 CFLocaleRef currentLocale = NULL;
383 UCollator * collator = NULL;
384
385 if (compareLocale != __CFDefaultCollatorLocale) {
386 currentLocale = CFLocaleCopyCurrent();
387 CFRelease(currentLocale);
388 if (compareLocale != currentLocale) return NULL;
389 }
390
391 __CFSpinLock(&__CFDefaultCollatorLock);
392 if ((NULL != currentLocale) && (__CFDefaultCollatorLocale != currentLocale)) {
393 while (__CFDefaultCollatorsCount > 0) ucol_close(__CFDefaultCollators[--__CFDefaultCollatorsCount]);
394 __CFDefaultCollatorLocale = currentLocale;
395 }
396
397 if (__CFDefaultCollatorsCount > 0) collator = __CFDefaultCollators[--__CFDefaultCollatorsCount];
398 __CFSpinUnlock(&__CFDefaultCollatorLock);
399
400 if (NULL == collator) {
401 collator = __CFStringCreateCollator(compareLocale);
402 }
403
404 return collator;
405 }
406
407 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
408 static void __collatorFinalize(UCollator *collator) {
409 CFLocaleRef locale = pthread_getspecific(__CFTSDKeyCollatorLocale);
410 pthread_setspecific(__CFTSDKeyCollatorUCollator, NULL);
411 pthread_setspecific(__CFTSDKeyCollatorLocale, NULL);
412 __CFSpinLock(&__CFDefaultCollatorLock);
413 if ((__CFDefaultCollatorLocale == locale) && (__CFDefaultCollatorsCount < kCFMaxCachedDefaultCollators)) {
414 __CFDefaultCollators[__CFDefaultCollatorsCount++] = collator;
415 collator = NULL;
416 }
417 __CFSpinUnlock(&__CFDefaultCollatorLock);
418 if (NULL != collator) ucol_close(collator);
419 if (locale) CFRelease(locale);
420 }
421 #endif
422
423 // -------------------------------------------------------------------------------------------------
424 // __CompareTextDefault
425 //
426 // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
427 // A negative value indicates that text1 sorts before text2.
428 // -------------------------------------------------------------------------------------------------
429 static OSStatus __CompareTextDefault(UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length, Boolean *equivalentP, SInt32 *orderP) {
430
431 // collator must have default settings restored on exit from this function
432
433 *equivalentP = true;
434 *orderP = 0;
435
436 if (options & kCFCompareNumerically) {
437 UErrorCode icuStatus = U_ZERO_ERROR;
438 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_ON, &icuStatus);
439 }
440
441 // Most string differences are Primary. Do a primary check first, then if there
442 // are no differences do a comparison with the options in the collator.
443 UCollationResult icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
444 if (icuResult != UCOL_EQUAL) {
445 *orderP = (icuResult == UCOL_LESS) ? -2 : 2;
446 }
447 if (*orderP == 0) {
448 UErrorCode icuStatus = U_ZERO_ERROR;
449 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
450 ucol_setAttribute(collator, UCOL_STRENGTH, (options & kCFCompareDiacriticInsensitive) ? UCOL_PRIMARY : UCOL_SECONDARY, &icuStatus);
451 ucol_setAttribute(collator, UCOL_CASE_LEVEL, (options & kCFCompareCaseInsensitive) ? UCOL_OFF : UCOL_ON, &icuStatus);
452 if (!U_SUCCESS(icuStatus)) {
453 icuStatus = U_ZERO_ERROR;
454 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
455 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
456 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
457 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
458 return 666;
459 }
460
461 // We don't have a primary difference. Recompare with standard collator.
462 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
463 if (icuResult != UCOL_EQUAL) {
464 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
465 }
466 icuStatus = U_ZERO_ERROR;
467 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
468 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
469 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
470 }
471 if (*orderP == 0 && (options & kCFCompareNonliteral) == 0) {
472 *orderP = __CompareSpecials(collator, options, text1Ptr, text1Length, text2Ptr, text2Length);
473 }
474
475 *equivalentP = (*orderP == 0);
476
477 // If strings are equivalent but we care about order and have not yet checked
478 // to the level of code point order, then do some more checks for order
479 if (*orderP == 0) {
480 UErrorCode icuStatus = U_ZERO_ERROR;
481 // First try to see if ICU can find any differences above code point level
482 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
483 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &icuStatus);
484 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &icuStatus);
485 if (!U_SUCCESS(icuStatus)) {
486 icuStatus = U_ZERO_ERROR;
487 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
488 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
489 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
490 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
491 return 666;
492 }
493 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
494 if (icuResult != UCOL_EQUAL) {
495 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
496 } else {
497 // no ICU differences above code point level, compare code points
498 *orderP = __CompareCodePoints( text1Ptr, text1Length, text2Ptr, text2Length );
499 }
500 icuStatus = U_ZERO_ERROR;
501 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
502 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
503 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
504 }
505
506 if (options & kCFCompareNumerically) {
507 UErrorCode icuStatus = U_ZERO_ERROR;
508 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
509 }
510 return 0; // noErr
511 }
512
513 static inline CFIndex __extendLocationBackward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *nonBaseBMP, const uint8_t *punctBMP) {
514 while (location > 0) {
515 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
516 UTF32Char otherChar;
517 if (CFUniCharIsSurrogateLowCharacter(ch) && CFUniCharIsSurrogateHighCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location - 1)))) {
518 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
519 uint8_t planeNo = (ch >> 16);
520 if ((planeNo > 1) || (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)))) break;
521 location -= 2;
522 } else {
523 if ((!CFUniCharIsMemberOfBitmap(ch, nonBaseBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
524 --location;
525 }
526 }
527
528 return location;
529 }
530
531 static inline CFIndex __extendLocationForward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *alnumBMP, const uint8_t *punctBMP, const uint8_t *controlBMP, CFIndex strMax) {
532 do {
533 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
534 UTF32Char otherChar;
535 if (CFUniCharIsSurrogateHighCharacter(ch) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location + 1)))) {
536 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
537 location += 2;
538 uint8_t planeNo = (ch >> 16);
539 if (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, planeNo))) break;
540 } else {
541 ++location;
542 if ((!CFUniCharIsMemberOfBitmap(ch, alnumBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP) && !CFUniCharIsMemberOfBitmap(ch, controlBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
543 }
544 } while (location < strMax);
545 return location;
546 }
547
548 __private_extern__ CFComparisonResult _CFCompareStringsWithLocale(CFStringInlineBuffer *str1, CFRange str1Range, CFStringInlineBuffer *str2, CFRange str2Range, CFOptionFlags options, const void *compareLocale) {
549 const UniChar *characters1;
550 const UniChar *characters2;
551 CFComparisonResult compResult = kCFCompareEqualTo;
552 CFRange range1 = str1Range;
553 CFRange range2 = str2Range;
554 SInt32 order;
555 Boolean isEqual;
556 bool forcedOrdering = ((options & kCFCompareForcedOrdering) ? true : false);
557
558 UCollator *collator = NULL;
559 bool defaultCollator = true;
560 static const uint8_t *alnumBMP = NULL;
561 static const uint8_t *nonBaseBMP = NULL;
562 static const uint8_t *punctBMP = NULL;
563 static const uint8_t *controlBMP = NULL;
564
565 if (NULL == alnumBMP) {
566 alnumBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, 0);
567 nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
568 punctBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, 0);
569 controlBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, 0);
570 }
571
572 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
573
574 range1.location = str1Range.location;
575 range2.location = str2Range.location;
576
577 // go backward
578 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
579 if (range1.location > 0) {
580 range1.location = __extendLocationBackward(range1.location - 1, str1, nonBaseBMP, punctBMP);
581 }
582
583 if (range2.location > 0) {
584 range2.location = __extendLocationBackward(range2.location - 1, str2, nonBaseBMP, punctBMP);
585 }
586
587 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
588 // First we try to use the last one used on this thread, if the locale is the same,
589 // otherwise we try to check out a default one, or then we create one.
590 UCollator *threadCollator = pthread_getspecific(__CFTSDKeyCollatorUCollator);
591 CFLocaleRef threadLocale = pthread_getspecific(__CFTSDKeyCollatorLocale);
592 if (compareLocale == threadLocale) {
593 collator = threadCollator;
594 } else {
595 #endif
596 collator = __CFStringCopyDefaultCollator((CFLocaleRef)compareLocale);
597 defaultCollator = true;
598 if (NULL == collator) {
599 collator = __CFStringCreateCollator((CFLocaleRef)compareLocale);
600 defaultCollator = false;
601 }
602 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
603 }
604 #endif
605
606 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
607 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
608
609 if ((NULL != characters1) && (NULL != characters2)) { // do fast
610 range1.length = (str1Range.location + str1Range.length) - range1.location;
611 range2.length = (str2Range.location + str2Range.length) - range2.location;
612
613 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
614 compResult = ((isEqual && !forcedOrdering) ? kCFCompareEqualTo : ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
615 } else {
616 compResult = ((memcmp(characters1, characters2, sizeof(UniChar) * range1.length) < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
617 }
618 } else {
619 UniChar *buffer1 = NULL;
620 UniChar *buffer2 = NULL;
621 UTF16Char sBuffer1[kCFStringCompareAllocationIncrement];
622 UTF16Char sBuffer2[kCFStringCompareAllocationIncrement];
623 CFIndex buffer1Len = 0, buffer2Len = 0;
624 CFIndex str1Max = str1Range.location + str1Range.length;
625 CFIndex str2Max = str2Range.location + str2Range.length;
626 CFIndex bufferSize;
627
628 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
629 do {
630 if (str1Range.location < str1Max) {
631 str1Range.location = __extendLocationForward(str1Range.location, str1, alnumBMP, punctBMP, controlBMP, str1Max);
632 range1.length = (str1Range.location - range1.location);
633 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
634
635 if (NULL == characters1) {
636 if ((0 > buffer1Len) || (range1.length > kCFStringCompareAllocationIncrement)) {
637 if (buffer1Len < range1.length) {
638 bufferSize = range1.length + (kCFStringCompareAllocationIncrement - (range1.length % kCFStringCompareAllocationIncrement));
639 if (0 == buffer1Len) {
640 buffer1 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
641 } else if (buffer1Len < range1.length) {
642 buffer1 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer1, sizeof(UTF16Char) * bufferSize, 0);
643 }
644 buffer1Len = bufferSize;
645 }
646 } else {
647 buffer1 = sBuffer1;
648 }
649
650 CFStringGetCharactersFromInlineBuffer(str1, range1, buffer1);
651 characters1 = buffer1;
652 }
653 }
654
655 if (str2Range.location < str2Max) {
656 str2Range.location = __extendLocationForward(str2Range.location, str2, alnumBMP, punctBMP, controlBMP, str2Max);
657 range2.length = (str2Range.location - range2.location);
658 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
659
660 if (NULL == characters2) {
661 if ((0 > buffer2Len) || (range2.length > kCFStringCompareAllocationIncrement)) {
662 if (buffer2Len < range2.length) {
663 bufferSize = range2.length + (kCFStringCompareAllocationIncrement - (range2.length % kCFStringCompareAllocationIncrement));
664 if (0 == buffer2Len) {
665 buffer2 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
666 } else if (buffer2Len < range2.length) {
667 buffer2 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer2, sizeof(UTF16Char) * bufferSize, 0);
668 }
669 buffer2Len = bufferSize;
670 }
671 } else {
672 buffer2 = sBuffer2;
673 }
674
675 CFStringGetCharactersFromInlineBuffer(str2, range2, buffer2);
676 characters2 = buffer2;
677 }
678 }
679
680 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
681 if (isEqual) {
682 if (forcedOrdering && (kCFCompareEqualTo == compResult) && (0 != order)) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
683 order = 0;
684 }
685 } else {
686 order = memcmp(characters1, characters2, sizeof(UTF16Char) * ((range1.length < range2.length) ? range1.length : range2.length));
687 if (0 == order) {
688 if (range1.length < range2.length) {
689 order = -2;
690 } else if (range2.length < range1.length) {
691 order = 2;
692 }
693 } else if (order < 0) {
694 --order;
695 } else if (order > 0) {
696 ++order;
697 }
698 }
699
700 if ((order < -1) || (order > 1)) break; // the result is deterministic
701
702 if (0 == order) {
703 range1.location = str1Range.location;
704 range2.location = str2Range.location;
705 }
706 } while ((str1Range.location < str1Max) || (str2Range.location < str2Max));
707
708 if (0 != order) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
709
710 if (buffer1Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer1);
711 if (buffer2Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer2);
712 }
713
714 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
715 if (collator == threadCollator) {
716 // do nothing, already cached
717 } else {
718 if (threadLocale) __collatorFinalize((UCollator *)pthread_getspecific(__CFTSDKeyCollatorUCollator)); // need to dealloc collators
719
720 pthread_key_init_np(__CFTSDKeyCollatorUCollator, (void *)__collatorFinalize);
721 pthread_setspecific(__CFTSDKeyCollatorUCollator, collator);
722 pthread_setspecific(__CFTSDKeyCollatorLocale, CFRetain(compareLocale));
723 }
724 #endif
725
726 return compResult;
727 }
728