]> git.saurik.com Git - apple/cf.git/blob - CFStringUtilities.c
CF-635.tar.gz
[apple/cf.git] / CFStringUtilities.c
1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFStringUtilities.c
25 Copyright (c) 1999-2011, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFStringEncodingConverterExt.h>
31 #include <CoreFoundation/CFUniChar.h>
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFStringEncodingDatabase.h"
34 #include "CFICUConverters.h"
35 #include <CoreFoundation/CFPreferences.h>
36 #include <limits.h>
37 #include <stdlib.h>
38 #include <unicode/ucol.h>
39 #include <unicode/ucoleitr.h>
40 #include <string.h>
41
42 #if DEPLOYMENT_TARGET_WINDOWS
43 #include <tchar.h>
44 #endif
45
46
47 Boolean CFStringIsEncodingAvailable(CFStringEncoding theEncoding) {
48 switch (theEncoding) {
49 case kCFStringEncodingASCII: // Built-in encodings
50 case kCFStringEncodingMacRoman:
51 case kCFStringEncodingUTF8:
52 case kCFStringEncodingNonLossyASCII:
53 case kCFStringEncodingWindowsLatin1:
54 case kCFStringEncodingNextStepLatin:
55 case kCFStringEncodingUTF16:
56 case kCFStringEncodingUTF16BE:
57 case kCFStringEncodingUTF16LE:
58 case kCFStringEncodingUTF32:
59 case kCFStringEncodingUTF32BE:
60 case kCFStringEncodingUTF32LE:
61 return true;
62
63 default:
64 return CFStringEncodingIsValidEncoding(theEncoding);
65 }
66 }
67
68 const CFStringEncoding* CFStringGetListOfAvailableEncodings() {
69 return (const CFStringEncoding *)CFStringEncodingListOfAvailableEncodings();
70 }
71
72 CFStringRef CFStringGetNameOfEncoding(CFStringEncoding theEncoding) {
73 static CFMutableDictionaryRef mappingTable = NULL;
74 CFStringRef theName = mappingTable ? (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)(uintptr_t)theEncoding) : NULL;
75
76 if (!theName) {
77 const char *encodingName = __CFStringEncodingGetName(theEncoding);
78
79 if (encodingName) {
80 theName = CFStringCreateWithCString(kCFAllocatorSystemDefault, encodingName, kCFStringEncodingASCII);
81 }
82
83 if (theName) {
84 if (!mappingTable) mappingTable = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks);
85
86 CFDictionaryAddValue(mappingTable, (const void*)(uintptr_t)theEncoding, (const void*)theName);
87 CFRelease(theName);
88 }
89 }
90
91 return theName;
92 }
93
94 CFStringEncoding CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName) {
95 CFStringEncoding encoding = kCFStringEncodingInvalidId;
96 #define BUFFER_SIZE (100)
97 char buffer[BUFFER_SIZE];
98 const char *name = CFStringGetCStringPtr(charsetName, __CFStringGetEightBitStringEncoding());
99
100 if (NULL == name) {
101 if (false == CFStringGetCString(charsetName, buffer, BUFFER_SIZE, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId;
102
103 name = buffer;
104 }
105
106 encoding = __CFStringEncodingGetFromCanonicalName(name);
107
108 if (kCFStringEncodingInvalidId == encoding) encoding = __CFStringEncodingGetFromICUName(name);
109
110
111 return encoding;
112 }
113
114 CFStringRef CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding) {
115 CFStringRef name = NULL;
116 CFIndex value = encoding;
117 static CFMutableDictionaryRef mappingTable = NULL;
118 static CFSpinLock_t lock = CFSpinLockInit;
119
120 __CFSpinLock(&lock);
121 name = ((NULL == mappingTable) ? NULL : (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)value));
122
123 if (NULL == name) {
124 #define STACK_BUFFER_SIZE (100)
125 char buffer[STACK_BUFFER_SIZE];
126
127 if (__CFStringEncodingGetCanonicalName(encoding, buffer, STACK_BUFFER_SIZE)) name = CFStringCreateWithCString(NULL, buffer, kCFStringEncodingASCII);
128
129
130 if (NULL != name) {
131 CFIndex value = encoding;
132
133 if (NULL == mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, &kCFTypeDictionaryValueCallBacks);
134
135 CFDictionaryAddValue(mappingTable, (const void*)value, (const void*)name);
136 CFRelease(name);
137 }
138 }
139 __CFSpinUnlock(&lock);
140
141 return name;
142 }
143
144 enum {
145 NSASCIIStringEncoding = 1, /* 0..127 only */
146 NSNEXTSTEPStringEncoding = 2,
147 NSJapaneseEUCStringEncoding = 3,
148 NSUTF8StringEncoding = 4,
149 NSISOLatin1StringEncoding = 5,
150 NSSymbolStringEncoding = 6,
151 NSNonLossyASCIIStringEncoding = 7,
152 NSShiftJISStringEncoding = 8,
153 NSISOLatin2StringEncoding = 9,
154 NSUnicodeStringEncoding = 10,
155 NSWindowsCP1251StringEncoding = 11, /* Cyrillic; same as AdobeStandardCyrillic */
156 NSWindowsCP1252StringEncoding = 12, /* WinLatin1 */
157 NSWindowsCP1253StringEncoding = 13, /* Greek */
158 NSWindowsCP1254StringEncoding = 14, /* Turkish */
159 NSWindowsCP1250StringEncoding = 15, /* WinLatin2 */
160 NSISO2022JPStringEncoding = 21, /* ISO 2022 Japanese encoding for e-mail */
161 NSMacOSRomanStringEncoding = 30,
162
163 NSProprietaryStringEncoding = 65536 /* Installation-specific encoding */
164 };
165
166 #define NSENCODING_MASK (1 << 31)
167
168 unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding) {
169 switch (theEncoding & 0xFFF) {
170 case kCFStringEncodingUnicode:
171 if (theEncoding == kCFStringEncodingUTF16) return NSUnicodeStringEncoding;
172 else if (theEncoding == kCFStringEncodingUTF8) return NSUTF8StringEncoding;
173 break;
174
175 case kCFStringEncodingWindowsLatin1: return NSWindowsCP1252StringEncoding;
176 case kCFStringEncodingMacRoman: return NSMacOSRomanStringEncoding;
177
178 case kCFStringEncodingASCII: return NSASCIIStringEncoding;
179
180 case kCFStringEncodingDOSJapanese: return NSShiftJISStringEncoding;
181 case kCFStringEncodingWindowsCyrillic: return NSWindowsCP1251StringEncoding;
182 case kCFStringEncodingWindowsGreek: return NSWindowsCP1253StringEncoding;
183 case kCFStringEncodingWindowsLatin5: return NSWindowsCP1254StringEncoding;
184 case kCFStringEncodingWindowsLatin2: return NSWindowsCP1250StringEncoding;
185 case kCFStringEncodingISOLatin1: return NSISOLatin1StringEncoding;
186
187 case kCFStringEncodingNonLossyASCII: return NSNonLossyASCIIStringEncoding;
188 case kCFStringEncodingEUC_JP: return NSJapaneseEUCStringEncoding;
189 case kCFStringEncodingMacSymbol: return NSSymbolStringEncoding;
190 case kCFStringEncodingISOLatin2: return NSISOLatin2StringEncoding;
191 case kCFStringEncodingISO_2022_JP: return NSISO2022JPStringEncoding;
192 case kCFStringEncodingNextStepLatin: return NSNEXTSTEPStringEncoding;
193 }
194
195 return NSENCODING_MASK | theEncoding;
196 }
197
198 CFStringEncoding CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding) {
199 const uint16_t encodings[] = {
200 kCFStringEncodingASCII,
201 kCFStringEncodingNextStepLatin,
202 kCFStringEncodingEUC_JP,
203 0,
204 kCFStringEncodingISOLatin1,
205 kCFStringEncodingMacSymbol,
206 kCFStringEncodingNonLossyASCII,
207 kCFStringEncodingDOSJapanese,
208 kCFStringEncodingISOLatin2,
209 kCFStringEncodingUTF16,
210 kCFStringEncodingWindowsCyrillic,
211 kCFStringEncodingWindowsLatin1,
212 kCFStringEncodingWindowsGreek,
213 kCFStringEncodingWindowsLatin5,
214 kCFStringEncodingWindowsLatin2
215 };
216
217 if (NSUTF8StringEncoding == theEncoding) return kCFStringEncodingUTF8;
218
219 if ((theEncoding > 0) && (theEncoding <= NSWindowsCP1250StringEncoding)) return encodings[theEncoding - 1];
220
221 switch (theEncoding) {
222 case NSMacOSRomanStringEncoding: return kCFStringEncodingMacRoman;
223 case NSISO2022JPStringEncoding: return kCFStringEncodingISO_2022_JP;
224
225 default:
226 return ((theEncoding & NSENCODING_MASK) ? theEncoding & ~NSENCODING_MASK : kCFStringEncodingInvalidId);
227 }
228 }
229
230 UInt32 CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding) {
231 uint16_t codepage = __CFStringEncodingGetWindowsCodePage(theEncoding);
232
233 return ((0 == codepage) ? kCFStringEncodingInvalidId : codepage);
234 }
235
236 CFStringEncoding CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding) {
237 return __CFStringEncodingGetFromWindowsCodePage(theEncoding);
238 }
239
240 CFStringEncoding CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding) {
241 CFStringEncoding macEncoding = __CFStringEncodingGetMostCompatibleMacScript(encoding);
242
243
244 return macEncoding;
245 }
246
247 #define kCFStringCompareAllocationIncrement (128)
248
249
250 // -------------------------------------------------------------------------------------------------
251 // CompareSpecials - ignore case & diacritic differences
252 //
253 // Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
254 // Fullwidth & halfwidth are in range FF00-FFEF
255 // Parenthesized & circled are in range 3200-32FF
256 // -------------------------------------------------------------------------------------------------
257
258 enum {
259 kUpperCaseWeightMin = 0x80 | 0x0F,
260 kUpperCaseWeightMax = 0x80 | 0x17,
261 kUpperToLowerDelta = 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
262 kMaskPrimarySecondary = 0xFFFFFF00,
263 kMaskPrimaryOnly = 0xFFFF0000,
264 kMaskSecondaryOnly = 0x0000FF00,
265 kMaskCaseTertiary = 0x000000FF // 2 hi bits case, 6 lo bits tertiary
266 };
267
268 static SInt32 __CompareSpecials(const UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length) {
269 UErrorCode icuStatus = U_ZERO_ERROR;
270 SInt32 orderWidth = 0;
271 SInt32 orderCompos = 0;
272
273 UCollationElements * collElems1 = ucol_openElements(collator, (const UChar *)text1Ptr, text1Length, &icuStatus);
274 UCollationElements * collElems2 = ucol_openElements(collator, (const UChar *)text2Ptr, text2Length, &icuStatus);
275 if (U_SUCCESS(icuStatus)) {
276 int32_t startOffset1 = 0;
277 int32_t startOffset2 = 0;
278
279 while (true) {
280 int32_t elemOrder1, elemOrder2;
281 int32_t offset1, offset2;
282
283 elemOrder1 = ucol_next(collElems1, &icuStatus);
284 elemOrder2 = ucol_next(collElems2, &icuStatus);
285 if ( U_FAILURE(icuStatus) || elemOrder1 == (int32_t)UCOL_NULLORDER || elemOrder2 == (int32_t)UCOL_NULLORDER ) {
286 break;
287 }
288
289 offset1 = ucol_getOffset(collElems1);
290 offset2 = ucol_getOffset(collElems2);
291 if ( (elemOrder1 & kMaskPrimarySecondary) == (elemOrder2 & kMaskPrimarySecondary) ) {
292 if ( (elemOrder1 & kMaskPrimaryOnly) != 0 ) {
293 // keys may differ in case, width, circling, etc.
294
295 int32_t tertiary1 = (elemOrder1 & kMaskCaseTertiary);
296 int32_t tertiary2 = (elemOrder2 & kMaskCaseTertiary);
297 // fold upper to lower case
298 if (tertiary1 >= kUpperCaseWeightMin && tertiary1 <= kUpperCaseWeightMax) {
299 tertiary1 -= kUpperToLowerDelta;
300 }
301 if (tertiary2 >= kUpperCaseWeightMin && tertiary2 <= kUpperCaseWeightMax) {
302 tertiary2 -= kUpperToLowerDelta;
303 }
304 // now compare
305 if (tertiary1 != tertiary2) {
306 orderWidth = (tertiary1 < tertiary2)? -1: 1;
307 break;
308 }
309
310 } else if ( (elemOrder1 & kMaskSecondaryOnly) != 0 ) {
311 // primary weights are both zero, but secondaries are not.
312 if ( orderCompos == 0 && (options & kCFCompareNonliteral) == 0 ) {
313 // We have a code element which is a diacritic.
314 // It may have come from a composed char or a combining char.
315 // If it came from a combining char (longer element length) it sorts first.
316 // This is only an approximation to what the Mac OS 9 code did, but this is an
317 // unusual case anyway.
318 int32_t elem1Length = offset1 - startOffset1;
319 int32_t elem2Length = offset2 - startOffset2;
320 if (elem1Length != elem2Length) {
321 orderCompos = (elem1Length > elem2Length)? -1: 1;
322 }
323 }
324 }
325 }
326
327 startOffset1 = offset1;
328 startOffset2 = offset2;
329 }
330 ucol_closeElements(collElems1);
331 ucol_closeElements(collElems2);
332 }
333
334 return (orderWidth != 0)? orderWidth: orderCompos;
335 }
336
337 static SInt32 __CompareCodePoints(const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length ) {
338 const UniChar * text1P = text1Ptr;
339 const UniChar * text2P = text2Ptr;
340 UInt32 textLimit = (text1Length <= text2Length)? text1Length: text2Length;
341 UInt32 textCounter;
342 SInt32 orderResult = 0;
343
344 // Loop through either string...the first difference differentiates this.
345 for (textCounter = 0; textCounter < textLimit && *text1P == *text2P; textCounter++) {
346 text1P++;
347 text2P++;
348 }
349 if (textCounter < textLimit) {
350 // code point difference
351 orderResult = (*text1P < *text2P) ? -1 : 1;
352 } else if (text1Length != text2Length) {
353 // one string has extra stuff at end
354 orderResult = (text1Length < text2Length) ? -1 : 1;
355 }
356 return orderResult;
357 }
358
359
360 extern const CFStringRef __kCFLocaleCollatorID;
361
362 static UCollator *__CFStringCreateCollator(CFLocaleRef compareLocale) {
363 CFStringRef canonLocaleCFStr = (CFStringRef)CFLocaleGetValue(compareLocale, __kCFLocaleCollatorID);
364 char icuLocaleStr[128] = {0};
365 CFStringGetCString(canonLocaleCFStr, icuLocaleStr, sizeof(icuLocaleStr), kCFStringEncodingASCII);
366 UErrorCode icuStatus = U_ZERO_ERROR;
367 UCollator * collator = ucol_open(icuLocaleStr, &icuStatus);
368 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
369 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &icuStatus);
370 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
371 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
372 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
373 return collator;
374 }
375
376 #define kCFMaxCachedDefaultCollators (8)
377 static UCollator *__CFDefaultCollators[kCFMaxCachedDefaultCollators];
378 static CFIndex __CFDefaultCollatorsCount = 0;
379 static const void *__CFDefaultCollatorLocale = NULL;
380 static CFSpinLock_t __CFDefaultCollatorLock = CFSpinLockInit;
381
382 static UCollator *__CFStringCopyDefaultCollator(CFLocaleRef compareLocale) {
383 CFLocaleRef currentLocale = NULL;
384 UCollator * collator = NULL;
385
386 if (compareLocale != __CFDefaultCollatorLocale) {
387 currentLocale = CFLocaleCopyCurrent();
388 if (compareLocale != currentLocale) {
389 CFRelease(currentLocale);
390 return NULL;
391 }
392 }
393
394 __CFSpinLock(&__CFDefaultCollatorLock);
395 if ((NULL != currentLocale) && (__CFDefaultCollatorLocale != currentLocale)) {
396 while (__CFDefaultCollatorsCount > 0) ucol_close(__CFDefaultCollators[--__CFDefaultCollatorsCount]);
397 __CFDefaultCollatorLocale = CFRetain(currentLocale);
398 }
399
400 if (__CFDefaultCollatorsCount > 0) collator = __CFDefaultCollators[--__CFDefaultCollatorsCount];
401 __CFSpinUnlock(&__CFDefaultCollatorLock);
402
403 if (NULL == collator) {
404 collator = __CFStringCreateCollator(compareLocale);
405 }
406
407 if (NULL != currentLocale) CFRelease(currentLocale);
408
409 return collator;
410 }
411
412 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
413 static void __collatorFinalize(UCollator *collator) {
414 CFLocaleRef locale = _CFGetTSD(__CFTSDKeyCollatorLocale);
415 _CFSetTSD(__CFTSDKeyCollatorUCollator, NULL, NULL);
416 _CFSetTSD(__CFTSDKeyCollatorLocale, NULL, NULL);
417 __CFSpinLock(&__CFDefaultCollatorLock);
418 if ((__CFDefaultCollatorLocale == locale) && (__CFDefaultCollatorsCount < kCFMaxCachedDefaultCollators)) {
419 __CFDefaultCollators[__CFDefaultCollatorsCount++] = collator;
420 collator = NULL;
421 }
422 __CFSpinUnlock(&__CFDefaultCollatorLock);
423 if (NULL != collator) ucol_close(collator);
424 if (locale) CFRelease(locale);
425 }
426 #endif
427
428 // -------------------------------------------------------------------------------------------------
429 // __CompareTextDefault
430 //
431 // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
432 // A negative value indicates that text1 sorts before text2.
433 // -------------------------------------------------------------------------------------------------
434 static OSStatus __CompareTextDefault(UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length, Boolean *equivalentP, SInt32 *orderP) {
435
436 // collator must have default settings restored on exit from this function
437
438 *equivalentP = true;
439 *orderP = 0;
440
441 if (options & kCFCompareNumerically) {
442 UErrorCode icuStatus = U_ZERO_ERROR;
443 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_ON, &icuStatus);
444 }
445
446 // Most string differences are Primary. Do a primary check first, then if there
447 // are no differences do a comparison with the options in the collator.
448 UCollationResult icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
449 if (icuResult != UCOL_EQUAL) {
450 *orderP = (icuResult == UCOL_LESS) ? -2 : 2;
451 }
452 if (*orderP == 0) {
453 UErrorCode icuStatus = U_ZERO_ERROR;
454 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
455 ucol_setAttribute(collator, UCOL_STRENGTH, (options & kCFCompareDiacriticInsensitive) ? UCOL_PRIMARY : UCOL_SECONDARY, &icuStatus);
456 ucol_setAttribute(collator, UCOL_CASE_LEVEL, (options & kCFCompareCaseInsensitive) ? UCOL_OFF : UCOL_ON, &icuStatus);
457 if (!U_SUCCESS(icuStatus)) {
458 icuStatus = U_ZERO_ERROR;
459 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
460 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
461 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
462 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
463 return 666;
464 }
465
466 // We don't have a primary difference. Recompare with standard collator.
467 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
468 if (icuResult != UCOL_EQUAL) {
469 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
470 }
471 icuStatus = U_ZERO_ERROR;
472 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
473 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
474 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
475 }
476 if (*orderP == 0 && (options & kCFCompareNonliteral) == 0) {
477 *orderP = __CompareSpecials(collator, options, text1Ptr, text1Length, text2Ptr, text2Length);
478 }
479
480 *equivalentP = (*orderP == 0);
481
482 // If strings are equivalent but we care about order and have not yet checked
483 // to the level of code point order, then do some more checks for order
484 if (*orderP == 0) {
485 UErrorCode icuStatus = U_ZERO_ERROR;
486 // First try to see if ICU can find any differences above code point level
487 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
488 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &icuStatus);
489 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &icuStatus);
490 if (!U_SUCCESS(icuStatus)) {
491 icuStatus = U_ZERO_ERROR;
492 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
493 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
494 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
495 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
496 return 666;
497 }
498 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
499 if (icuResult != UCOL_EQUAL) {
500 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
501 } else {
502 // no ICU differences above code point level, compare code points
503 *orderP = __CompareCodePoints( text1Ptr, text1Length, text2Ptr, text2Length );
504 }
505 icuStatus = U_ZERO_ERROR;
506 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
507 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
508 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
509 }
510
511 if (options & kCFCompareNumerically) {
512 UErrorCode icuStatus = U_ZERO_ERROR;
513 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
514 }
515 return 0; // noErr
516 }
517
518 static inline CFIndex __extendLocationBackward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *nonBaseBMP, const uint8_t *punctBMP) {
519 while (location > 0) {
520 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
521 UTF32Char otherChar;
522 if (CFUniCharIsSurrogateLowCharacter(ch) && CFUniCharIsSurrogateHighCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location - 1)))) {
523 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
524 uint8_t planeNo = (ch >> 16);
525 if ((planeNo > 1) || (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)))) break;
526 location -= 2;
527 } else {
528 if ((!CFUniCharIsMemberOfBitmap(ch, nonBaseBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
529 --location;
530 }
531 }
532
533 return location;
534 }
535
536 static inline CFIndex __extendLocationForward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *alnumBMP, const uint8_t *punctBMP, const uint8_t *controlBMP, CFIndex strMax) {
537 do {
538 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
539 UTF32Char otherChar;
540 if (CFUniCharIsSurrogateHighCharacter(ch) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location + 1)))) {
541 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
542 location += 2;
543 uint8_t planeNo = (ch >> 16);
544 if (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, planeNo))) break;
545 } else {
546 ++location;
547 if ((!CFUniCharIsMemberOfBitmap(ch, alnumBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP) && !CFUniCharIsMemberOfBitmap(ch, controlBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
548 }
549 } while (location < strMax);
550 return location;
551 }
552
553 __private_extern__ CFComparisonResult _CFCompareStringsWithLocale(CFStringInlineBuffer *str1, CFRange str1Range, CFStringInlineBuffer *str2, CFRange str2Range, CFOptionFlags options, const void *compareLocale) {
554 const UniChar *characters1;
555 const UniChar *characters2;
556 CFComparisonResult compResult = kCFCompareEqualTo;
557 CFRange range1 = str1Range;
558 CFRange range2 = str2Range;
559 SInt32 order;
560 Boolean isEqual;
561 bool forcedOrdering = ((options & kCFCompareForcedOrdering) ? true : false);
562
563 UCollator *collator = NULL;
564 bool defaultCollator = true;
565 static const uint8_t *alnumBMP = NULL;
566 static const uint8_t *nonBaseBMP = NULL;
567 static const uint8_t *punctBMP = NULL;
568 static const uint8_t *controlBMP = NULL;
569
570 if (NULL == alnumBMP) {
571 alnumBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, 0);
572 nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
573 punctBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, 0);
574 controlBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, 0);
575 }
576
577 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
578
579 range1.location = str1Range.location;
580 range2.location = str2Range.location;
581
582 // go backward
583 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
584 if (range1.location > 0) {
585 range1.location = __extendLocationBackward(range1.location - 1, str1, nonBaseBMP, punctBMP);
586 }
587
588 if (range2.location > 0) {
589 range2.location = __extendLocationBackward(range2.location - 1, str2, nonBaseBMP, punctBMP);
590 }
591
592 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
593 // First we try to use the last one used on this thread, if the locale is the same,
594 // otherwise we try to check out a default one, or then we create one.
595 UCollator *threadCollator = _CFGetTSD(__CFTSDKeyCollatorUCollator);
596 CFLocaleRef threadLocale = _CFGetTSD(__CFTSDKeyCollatorLocale);
597 if (compareLocale == threadLocale) {
598 collator = threadCollator;
599 } else {
600 #endif
601 collator = __CFStringCopyDefaultCollator((CFLocaleRef)compareLocale);
602 defaultCollator = true;
603 if (NULL == collator) {
604 collator = __CFStringCreateCollator((CFLocaleRef)compareLocale);
605 defaultCollator = false;
606 }
607 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
608 }
609 #endif
610
611 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
612 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
613
614 if ((NULL != characters1) && (NULL != characters2)) { // do fast
615 range1.length = (str1Range.location + str1Range.length) - range1.location;
616 range2.length = (str2Range.location + str2Range.length) - range2.location;
617
618 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
619 compResult = ((isEqual && !forcedOrdering) ? kCFCompareEqualTo : ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
620 } else {
621 compResult = ((memcmp(characters1, characters2, sizeof(UniChar) * range1.length) < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
622 }
623 } else {
624 UniChar *buffer1 = NULL;
625 UniChar *buffer2 = NULL;
626 UTF16Char sBuffer1[kCFStringCompareAllocationIncrement];
627 UTF16Char sBuffer2[kCFStringCompareAllocationIncrement];
628 CFIndex buffer1Len = 0, buffer2Len = 0;
629 CFIndex str1Max = str1Range.location + str1Range.length;
630 CFIndex str2Max = str2Range.location + str2Range.length;
631 CFIndex bufferSize;
632
633 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
634 do {
635 if (str1Range.location < str1Max) {
636 str1Range.location = __extendLocationForward(str1Range.location, str1, alnumBMP, punctBMP, controlBMP, str1Max);
637 range1.length = (str1Range.location - range1.location);
638 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
639
640 if (NULL == characters1) {
641 if ((0 > buffer1Len) || (range1.length > kCFStringCompareAllocationIncrement)) {
642 if (buffer1Len < range1.length) {
643 bufferSize = range1.length + (kCFStringCompareAllocationIncrement - (range1.length % kCFStringCompareAllocationIncrement));
644 if (0 == buffer1Len) {
645 buffer1 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
646 } else if (buffer1Len < range1.length) {
647 buffer1 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer1, sizeof(UTF16Char) * bufferSize, 0);
648 }
649 buffer1Len = bufferSize;
650 }
651 } else {
652 buffer1 = sBuffer1;
653 }
654
655 CFStringGetCharactersFromInlineBuffer(str1, range1, buffer1);
656 characters1 = buffer1;
657 }
658 }
659
660 if (str2Range.location < str2Max) {
661 str2Range.location = __extendLocationForward(str2Range.location, str2, alnumBMP, punctBMP, controlBMP, str2Max);
662 range2.length = (str2Range.location - range2.location);
663 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
664
665 if (NULL == characters2) {
666 if ((0 > buffer2Len) || (range2.length > kCFStringCompareAllocationIncrement)) {
667 if (buffer2Len < range2.length) {
668 bufferSize = range2.length + (kCFStringCompareAllocationIncrement - (range2.length % kCFStringCompareAllocationIncrement));
669 if (0 == buffer2Len) {
670 buffer2 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
671 } else if (buffer2Len < range2.length) {
672 buffer2 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer2, sizeof(UTF16Char) * bufferSize, 0);
673 }
674 buffer2Len = bufferSize;
675 }
676 } else {
677 buffer2 = sBuffer2;
678 }
679
680 CFStringGetCharactersFromInlineBuffer(str2, range2, buffer2);
681 characters2 = buffer2;
682 }
683 }
684
685 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
686 if (isEqual) {
687 if (forcedOrdering && (kCFCompareEqualTo == compResult) && (0 != order)) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
688 order = 0;
689 }
690 } else {
691 order = memcmp(characters1, characters2, sizeof(UTF16Char) * ((range1.length < range2.length) ? range1.length : range2.length));
692 if (0 == order) {
693 if (range1.length < range2.length) {
694 order = -2;
695 } else if (range2.length < range1.length) {
696 order = 2;
697 }
698 } else if (order < 0) {
699 --order;
700 } else if (order > 0) {
701 ++order;
702 }
703 }
704
705 if ((order < -1) || (order > 1)) break; // the result is deterministic
706
707 if (0 == order) {
708 range1.location = str1Range.location;
709 range2.location = str2Range.location;
710 }
711 } while ((str1Range.location < str1Max) || (str2Range.location < str2Max));
712
713 if (0 != order) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
714
715 if (buffer1Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer1);
716 if (buffer2Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer2);
717 }
718
719 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
720 if (collator == threadCollator) {
721 // do nothing, already cached
722 } else {
723 if (threadLocale) __collatorFinalize((UCollator *)_CFGetTSD(__CFTSDKeyCollatorUCollator)); // need to dealloc collators
724
725 _CFSetTSD(__CFTSDKeyCollatorUCollator, collator, (void *)__collatorFinalize);
726 _CFSetTSD(__CFTSDKeyCollatorLocale, (void *)CFRetain(compareLocale), NULL);
727 }
728 #endif
729
730 return compResult;
731 }
732