]> git.saurik.com Git - apple/cf.git/blame - CFStringUtilities.c
CF-635.15.tar.gz
[apple/cf.git] / CFStringUtilities.c
CommitLineData
9ce05555 1/*
8ca704e1 2 * Copyright (c) 2011 Apple Inc. All rights reserved.
9ce05555
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
9ce05555
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
f64f9b69 23
9ce05555 24/* CFStringUtilities.c
8ca704e1 25 Copyright (c) 1999-2011, Apple Inc. All rights reserved.
9ce05555
A
26 Responsibility: Aki Inoue
27*/
28
29#include "CFInternal.h"
cf7d2af9
A
30#include <CoreFoundation/CFStringEncodingConverterExt.h>
31#include <CoreFoundation/CFUniChar.h>
d8925383 32#include <CoreFoundation/CFStringEncodingExt.h>
cf7d2af9
A
33#include "CFStringEncodingDatabase.h"
34#include "CFICUConverters.h"
bd5b749c 35#include <CoreFoundation/CFPreferences.h>
9ce05555 36#include <limits.h>
9ce05555 37#include <stdlib.h>
cf7d2af9
A
38#include <unicode/ucol.h>
39#include <unicode/ucoleitr.h>
40#include <string.h>
41
42#if DEPLOYMENT_TARGET_WINDOWS
9ce05555
A
43#include <tchar.h>
44#endif
45
46
9ce05555
A
47Boolean CFStringIsEncodingAvailable(CFStringEncoding theEncoding) {
48 switch (theEncoding) {
49 case kCFStringEncodingASCII: // Built-in encodings
50 case kCFStringEncodingMacRoman:
9ce05555
A
51 case kCFStringEncodingUTF8:
52 case kCFStringEncodingNonLossyASCII:
53 case kCFStringEncodingWindowsLatin1:
54 case kCFStringEncodingNextStepLatin:
d8925383
A
55 case kCFStringEncodingUTF16:
56 case kCFStringEncodingUTF16BE:
57 case kCFStringEncodingUTF16LE:
58 case kCFStringEncodingUTF32:
59 case kCFStringEncodingUTF32BE:
60 case kCFStringEncodingUTF32LE:
9ce05555
A
61 return true;
62
63 default:
64 return CFStringEncodingIsValidEncoding(theEncoding);
65 }
66}
67
68const CFStringEncoding* CFStringGetListOfAvailableEncodings() {
bd5b749c 69 return (const CFStringEncoding *)CFStringEncodingListOfAvailableEncodings();
9ce05555
A
70}
71
72CFStringRef CFStringGetNameOfEncoding(CFStringEncoding theEncoding) {
73 static CFMutableDictionaryRef mappingTable = NULL;
bd5b749c 74 CFStringRef theName = mappingTable ? (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)(uintptr_t)theEncoding) : NULL;
9ce05555
A
75
76 if (!theName) {
cf7d2af9
A
77 const char *encodingName = __CFStringEncodingGetName(theEncoding);
78
79 if (encodingName) {
80 theName = CFStringCreateWithCString(kCFAllocatorSystemDefault, encodingName, kCFStringEncodingASCII);
d8925383 81 }
cf7d2af9 82
d8925383 83 if (theName) {
bd5b749c 84 if (!mappingTable) mappingTable = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks);
9ce05555 85
bd5b749c 86 CFDictionaryAddValue(mappingTable, (const void*)(uintptr_t)theEncoding, (const void*)theName);
d8925383 87 CFRelease(theName);
9ce05555
A
88 }
89 }
90
91 return theName;
92}
93
cf7d2af9
A
94CFStringEncoding CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName) {
95 CFStringEncoding encoding = kCFStringEncodingInvalidId;
96#define BUFFER_SIZE (100)
97 char buffer[BUFFER_SIZE];
98 const char *name = CFStringGetCStringPtr(charsetName, __CFStringGetEightBitStringEncoding());
d8925383 99
cf7d2af9
A
100 if (NULL == name) {
101 if (false == CFStringGetCString(charsetName, buffer, BUFFER_SIZE, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId;
9ce05555 102
cf7d2af9
A
103 name = buffer;
104 }
9ce05555 105
cf7d2af9 106 encoding = __CFStringEncodingGetFromCanonicalName(name);
9ce05555 107
cf7d2af9 108 if (kCFStringEncodingInvalidId == encoding) encoding = __CFStringEncodingGetFromICUName(name);
9ce05555 109
9ce05555 110
cf7d2af9 111 return encoding;
9ce05555
A
112}
113
114CFStringRef CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding) {
cf7d2af9
A
115 CFStringRef name = NULL;
116 CFIndex value = encoding;
9ce05555 117 static CFMutableDictionaryRef mappingTable = NULL;
cf7d2af9 118 static CFSpinLock_t lock = CFSpinLockInit;
9ce05555 119
cf7d2af9
A
120 __CFSpinLock(&lock);
121 name = ((NULL == mappingTable) ? NULL : (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)value));
9ce05555 122
cf7d2af9
A
123 if (NULL == name) {
124#define STACK_BUFFER_SIZE (100)
125 char buffer[STACK_BUFFER_SIZE];
9ce05555 126
cf7d2af9
A
127 if (__CFStringEncodingGetCanonicalName(encoding, buffer, STACK_BUFFER_SIZE)) name = CFStringCreateWithCString(NULL, buffer, kCFStringEncodingASCII);
128
129
130 if (NULL != name) {
131 CFIndex value = encoding;
132
133 if (NULL == mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, &kCFTypeDictionaryValueCallBacks);
134
135 CFDictionaryAddValue(mappingTable, (const void*)value, (const void*)name);
136 CFRelease(name);
9ce05555
A
137 }
138 }
cf7d2af9 139 __CFSpinUnlock(&lock);
9ce05555 140
cf7d2af9 141 return name;
9ce05555
A
142}
143
144enum {
145 NSASCIIStringEncoding = 1, /* 0..127 only */
146 NSNEXTSTEPStringEncoding = 2,
147 NSJapaneseEUCStringEncoding = 3,
148 NSUTF8StringEncoding = 4,
149 NSISOLatin1StringEncoding = 5,
150 NSSymbolStringEncoding = 6,
151 NSNonLossyASCIIStringEncoding = 7,
152 NSShiftJISStringEncoding = 8,
153 NSISOLatin2StringEncoding = 9,
154 NSUnicodeStringEncoding = 10,
155 NSWindowsCP1251StringEncoding = 11, /* Cyrillic; same as AdobeStandardCyrillic */
156 NSWindowsCP1252StringEncoding = 12, /* WinLatin1 */
157 NSWindowsCP1253StringEncoding = 13, /* Greek */
158 NSWindowsCP1254StringEncoding = 14, /* Turkish */
159 NSWindowsCP1250StringEncoding = 15, /* WinLatin2 */
160 NSISO2022JPStringEncoding = 21, /* ISO 2022 Japanese encoding for e-mail */
161 NSMacOSRomanStringEncoding = 30,
162
163 NSProprietaryStringEncoding = 65536 /* Installation-specific encoding */
164};
165
166#define NSENCODING_MASK (1 << 31)
167
bd5b749c 168unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding) {
d8925383 169 switch (theEncoding & 0xFFF) {
cf7d2af9
A
170 case kCFStringEncodingUnicode:
171 if (theEncoding == kCFStringEncodingUTF16) return NSUnicodeStringEncoding;
172 else if (theEncoding == kCFStringEncodingUTF8) return NSUTF8StringEncoding;
173 break;
174
9ce05555
A
175 case kCFStringEncodingWindowsLatin1: return NSWindowsCP1252StringEncoding;
176 case kCFStringEncodingMacRoman: return NSMacOSRomanStringEncoding;
cf7d2af9
A
177
178 case kCFStringEncodingASCII: return NSASCIIStringEncoding;
179
d8925383 180 case kCFStringEncodingDOSJapanese: return NSShiftJISStringEncoding;
d8925383
A
181 case kCFStringEncodingWindowsCyrillic: return NSWindowsCP1251StringEncoding;
182 case kCFStringEncodingWindowsGreek: return NSWindowsCP1253StringEncoding;
183 case kCFStringEncodingWindowsLatin5: return NSWindowsCP1254StringEncoding;
184 case kCFStringEncodingWindowsLatin2: return NSWindowsCP1250StringEncoding;
cf7d2af9 185 case kCFStringEncodingISOLatin1: return NSISOLatin1StringEncoding;
d8925383 186
cf7d2af9
A
187 case kCFStringEncodingNonLossyASCII: return NSNonLossyASCIIStringEncoding;
188 case kCFStringEncodingEUC_JP: return NSJapaneseEUCStringEncoding;
189 case kCFStringEncodingMacSymbol: return NSSymbolStringEncoding;
190 case kCFStringEncodingISOLatin2: return NSISOLatin2StringEncoding;
191 case kCFStringEncodingISO_2022_JP: return NSISO2022JPStringEncoding;
192 case kCFStringEncodingNextStepLatin: return NSNEXTSTEPStringEncoding;
9ce05555 193 }
cf7d2af9
A
194
195 return NSENCODING_MASK | theEncoding;
9ce05555
A
196}
197
bd5b749c 198CFStringEncoding CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding) {
cf7d2af9
A
199 const uint16_t encodings[] = {
200 kCFStringEncodingASCII,
201 kCFStringEncodingNextStepLatin,
202 kCFStringEncodingEUC_JP,
203 0,
204 kCFStringEncodingISOLatin1,
205 kCFStringEncodingMacSymbol,
206 kCFStringEncodingNonLossyASCII,
207 kCFStringEncodingDOSJapanese,
208 kCFStringEncodingISOLatin2,
209 kCFStringEncodingUTF16,
210 kCFStringEncodingWindowsCyrillic,
211 kCFStringEncodingWindowsLatin1,
212 kCFStringEncodingWindowsGreek,
213 kCFStringEncodingWindowsLatin5,
214 kCFStringEncodingWindowsLatin2
215 };
216
217 if (NSUTF8StringEncoding == theEncoding) return kCFStringEncodingUTF8;
218
219 if ((theEncoding > 0) && (theEncoding <= NSWindowsCP1250StringEncoding)) return encodings[theEncoding - 1];
220
9ce05555 221 switch (theEncoding) {
9ce05555 222 case NSMacOSRomanStringEncoding: return kCFStringEncodingMacRoman;
d8925383 223 case NSISO2022JPStringEncoding: return kCFStringEncodingISO_2022_JP;
cf7d2af9 224
9ce05555
A
225 default:
226 return ((theEncoding & NSENCODING_MASK) ? theEncoding & ~NSENCODING_MASK : kCFStringEncodingInvalidId);
227 }
228}
229
cf7d2af9
A
230UInt32 CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding) {
231 uint16_t codepage = __CFStringEncodingGetWindowsCodePage(theEncoding);
9ce05555 232
cf7d2af9
A
233 return ((0 == codepage) ? kCFStringEncodingInvalidId : codepage);
234}
9ce05555 235
cf7d2af9
A
236CFStringEncoding CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding) {
237 return __CFStringEncodingGetFromWindowsCodePage(theEncoding);
238}
239
240CFStringEncoding CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding) {
241 CFStringEncoding macEncoding = __CFStringEncodingGetMostCompatibleMacScript(encoding);
d8925383 242
cf7d2af9
A
243
244 return macEncoding;
245}
246
247#define kCFStringCompareAllocationIncrement (128)
248
249
250// -------------------------------------------------------------------------------------------------
251// CompareSpecials - ignore case & diacritic differences
252//
253// Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
254// Fullwidth & halfwidth are in range FF00-FFEF
255// Parenthesized & circled are in range 3200-32FF
256// -------------------------------------------------------------------------------------------------
257
258enum {
259 kUpperCaseWeightMin = 0x80 | 0x0F,
260 kUpperCaseWeightMax = 0x80 | 0x17,
261 kUpperToLowerDelta = 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
262 kMaskPrimarySecondary = 0xFFFFFF00,
263 kMaskPrimaryOnly = 0xFFFF0000,
264 kMaskSecondaryOnly = 0x0000FF00,
265 kMaskCaseTertiary = 0x000000FF // 2 hi bits case, 6 lo bits tertiary
9ce05555
A
266};
267
cf7d2af9
A
268static SInt32 __CompareSpecials(const UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length) {
269 UErrorCode icuStatus = U_ZERO_ERROR;
270 SInt32 orderWidth = 0;
271 SInt32 orderCompos = 0;
272
273 UCollationElements * collElems1 = ucol_openElements(collator, (const UChar *)text1Ptr, text1Length, &icuStatus);
274 UCollationElements * collElems2 = ucol_openElements(collator, (const UChar *)text2Ptr, text2Length, &icuStatus);
275 if (U_SUCCESS(icuStatus)) {
276 int32_t startOffset1 = 0;
277 int32_t startOffset2 = 0;
278
279 while (true) {
280 int32_t elemOrder1, elemOrder2;
281 int32_t offset1, offset2;
282
283 elemOrder1 = ucol_next(collElems1, &icuStatus);
284 elemOrder2 = ucol_next(collElems2, &icuStatus);
285 if ( U_FAILURE(icuStatus) || elemOrder1 == (int32_t)UCOL_NULLORDER || elemOrder2 == (int32_t)UCOL_NULLORDER ) {
286 break;
287 }
288
289 offset1 = ucol_getOffset(collElems1);
290 offset2 = ucol_getOffset(collElems2);
291 if ( (elemOrder1 & kMaskPrimarySecondary) == (elemOrder2 & kMaskPrimarySecondary) ) {
292 if ( (elemOrder1 & kMaskPrimaryOnly) != 0 ) {
293 // keys may differ in case, width, circling, etc.
294
295 int32_t tertiary1 = (elemOrder1 & kMaskCaseTertiary);
296 int32_t tertiary2 = (elemOrder2 & kMaskCaseTertiary);
297 // fold upper to lower case
298 if (tertiary1 >= kUpperCaseWeightMin && tertiary1 <= kUpperCaseWeightMax) {
299 tertiary1 -= kUpperToLowerDelta;
300 }
301 if (tertiary2 >= kUpperCaseWeightMin && tertiary2 <= kUpperCaseWeightMax) {
302 tertiary2 -= kUpperToLowerDelta;
303 }
304 // now compare
305 if (tertiary1 != tertiary2) {
306 orderWidth = (tertiary1 < tertiary2)? -1: 1;
307 break;
308 }
309
310 } else if ( (elemOrder1 & kMaskSecondaryOnly) != 0 ) {
311 // primary weights are both zero, but secondaries are not.
312 if ( orderCompos == 0 && (options & kCFCompareNonliteral) == 0 ) {
313 // We have a code element which is a diacritic.
314 // It may have come from a composed char or a combining char.
315 // If it came from a combining char (longer element length) it sorts first.
316 // This is only an approximation to what the Mac OS 9 code did, but this is an
317 // unusual case anyway.
318 int32_t elem1Length = offset1 - startOffset1;
319 int32_t elem2Length = offset2 - startOffset2;
320 if (elem1Length != elem2Length) {
321 orderCompos = (elem1Length > elem2Length)? -1: 1;
322 }
323 }
324 }
325 }
326
327 startOffset1 = offset1;
328 startOffset2 = offset2;
329 }
330 ucol_closeElements(collElems1);
331 ucol_closeElements(collElems2);
332 }
333
334 return (orderWidth != 0)? orderWidth: orderCompos;
335}
d8925383 336
cf7d2af9
A
337static SInt32 __CompareCodePoints(const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length ) {
338 const UniChar * text1P = text1Ptr;
339 const UniChar * text2P = text2Ptr;
340 UInt32 textLimit = (text1Length <= text2Length)? text1Length: text2Length;
341 UInt32 textCounter;
342 SInt32 orderResult = 0;
343
344 // Loop through either string...the first difference differentiates this.
345 for (textCounter = 0; textCounter < textLimit && *text1P == *text2P; textCounter++) {
346 text1P++;
347 text2P++;
348 }
349 if (textCounter < textLimit) {
350 // code point difference
351 orderResult = (*text1P < *text2P) ? -1 : 1;
352 } else if (text1Length != text2Length) {
353 // one string has extra stuff at end
354 orderResult = (text1Length < text2Length) ? -1 : 1;
355 }
356 return orderResult;
357}
d8925383 358
d8925383 359
cf7d2af9
A
360extern const CFStringRef __kCFLocaleCollatorID;
361
362static UCollator *__CFStringCreateCollator(CFLocaleRef compareLocale) {
363 CFStringRef canonLocaleCFStr = (CFStringRef)CFLocaleGetValue(compareLocale, __kCFLocaleCollatorID);
364 char icuLocaleStr[128] = {0};
365 CFStringGetCString(canonLocaleCFStr, icuLocaleStr, sizeof(icuLocaleStr), kCFStringEncodingASCII);
366 UErrorCode icuStatus = U_ZERO_ERROR;
367 UCollator * collator = ucol_open(icuLocaleStr, &icuStatus);
368 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
369 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &icuStatus);
370 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
371 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
372 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
373 return collator;
374}
d8925383 375
cf7d2af9
A
376#define kCFMaxCachedDefaultCollators (8)
377static UCollator *__CFDefaultCollators[kCFMaxCachedDefaultCollators];
378static CFIndex __CFDefaultCollatorsCount = 0;
379static const void *__CFDefaultCollatorLocale = NULL;
380static CFSpinLock_t __CFDefaultCollatorLock = CFSpinLockInit;
d8925383 381
cf7d2af9
A
382static UCollator *__CFStringCopyDefaultCollator(CFLocaleRef compareLocale) {
383 CFLocaleRef currentLocale = NULL;
384 UCollator * collator = NULL;
d8925383 385
cf7d2af9
A
386 if (compareLocale != __CFDefaultCollatorLocale) {
387 currentLocale = CFLocaleCopyCurrent();
8ca704e1
A
388 if (compareLocale != currentLocale) {
389 CFRelease(currentLocale);
390 return NULL;
391 }
9ce05555 392 }
d8925383 393
cf7d2af9
A
394 __CFSpinLock(&__CFDefaultCollatorLock);
395 if ((NULL != currentLocale) && (__CFDefaultCollatorLocale != currentLocale)) {
396 while (__CFDefaultCollatorsCount > 0) ucol_close(__CFDefaultCollators[--__CFDefaultCollatorsCount]);
8ca704e1 397 __CFDefaultCollatorLocale = CFRetain(currentLocale);
cf7d2af9 398 }
9ce05555 399
cf7d2af9
A
400 if (__CFDefaultCollatorsCount > 0) collator = __CFDefaultCollators[--__CFDefaultCollatorsCount];
401 __CFSpinUnlock(&__CFDefaultCollatorLock);
9ce05555 402
cf7d2af9
A
403 if (NULL == collator) {
404 collator = __CFStringCreateCollator(compareLocale);
405 }
9ce05555 406
8ca704e1
A
407 if (NULL != currentLocale) CFRelease(currentLocale);
408
cf7d2af9
A
409 return collator;
410}
9ce05555 411
cf7d2af9
A
412#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
413static void __collatorFinalize(UCollator *collator) {
8ca704e1
A
414 CFLocaleRef locale = _CFGetTSD(__CFTSDKeyCollatorLocale);
415 _CFSetTSD(__CFTSDKeyCollatorUCollator, NULL, NULL);
416 _CFSetTSD(__CFTSDKeyCollatorLocale, NULL, NULL);
cf7d2af9
A
417 __CFSpinLock(&__CFDefaultCollatorLock);
418 if ((__CFDefaultCollatorLocale == locale) && (__CFDefaultCollatorsCount < kCFMaxCachedDefaultCollators)) {
419 __CFDefaultCollators[__CFDefaultCollatorsCount++] = collator;
420 collator = NULL;
9ce05555 421 }
cf7d2af9
A
422 __CFSpinUnlock(&__CFDefaultCollatorLock);
423 if (NULL != collator) ucol_close(collator);
424 if (locale) CFRelease(locale);
9ce05555 425}
d8925383 426#endif
9ce05555 427
cf7d2af9
A
428// -------------------------------------------------------------------------------------------------
429// __CompareTextDefault
430//
431// A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
432// A negative value indicates that text1 sorts before text2.
433// -------------------------------------------------------------------------------------------------
434static OSStatus __CompareTextDefault(UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length, Boolean *equivalentP, SInt32 *orderP) {
435
436 // collator must have default settings restored on exit from this function
437
438 *equivalentP = true;
439 *orderP = 0;
440
441 if (options & kCFCompareNumerically) {
442 UErrorCode icuStatus = U_ZERO_ERROR;
443 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_ON, &icuStatus);
444 }
445
446 // Most string differences are Primary. Do a primary check first, then if there
447 // are no differences do a comparison with the options in the collator.
448 UCollationResult icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
449 if (icuResult != UCOL_EQUAL) {
450 *orderP = (icuResult == UCOL_LESS) ? -2 : 2;
451 }
452 if (*orderP == 0) {
453 UErrorCode icuStatus = U_ZERO_ERROR;
454 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
455 ucol_setAttribute(collator, UCOL_STRENGTH, (options & kCFCompareDiacriticInsensitive) ? UCOL_PRIMARY : UCOL_SECONDARY, &icuStatus);
456 ucol_setAttribute(collator, UCOL_CASE_LEVEL, (options & kCFCompareCaseInsensitive) ? UCOL_OFF : UCOL_ON, &icuStatus);
457 if (!U_SUCCESS(icuStatus)) {
458 icuStatus = U_ZERO_ERROR;
459 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
460 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
461 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
462 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
463 return 666;
464 }
465
466 // We don't have a primary difference. Recompare with standard collator.
467 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
468 if (icuResult != UCOL_EQUAL) {
469 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
470 }
471 icuStatus = U_ZERO_ERROR;
472 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
473 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
474 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
475 }
476 if (*orderP == 0 && (options & kCFCompareNonliteral) == 0) {
477 *orderP = __CompareSpecials(collator, options, text1Ptr, text1Length, text2Ptr, text2Length);
478 }
479
480 *equivalentP = (*orderP == 0);
481
482 // If strings are equivalent but we care about order and have not yet checked
483 // to the level of code point order, then do some more checks for order
484 if (*orderP == 0) {
485 UErrorCode icuStatus = U_ZERO_ERROR;
486 // First try to see if ICU can find any differences above code point level
487 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
488 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &icuStatus);
489 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &icuStatus);
490 if (!U_SUCCESS(icuStatus)) {
491 icuStatus = U_ZERO_ERROR;
492 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
493 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
494 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
495 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
496 return 666;
497 }
498 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
499 if (icuResult != UCOL_EQUAL) {
500 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
501 } else {
502 // no ICU differences above code point level, compare code points
503 *orderP = __CompareCodePoints( text1Ptr, text1Length, text2Ptr, text2Length );
504 }
505 icuStatus = U_ZERO_ERROR;
506 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
507 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
508 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
509 }
510
511 if (options & kCFCompareNumerically) {
512 UErrorCode icuStatus = U_ZERO_ERROR;
513 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
514 }
515 return 0; // noErr
516}
517
518static inline CFIndex __extendLocationBackward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *nonBaseBMP, const uint8_t *punctBMP) {
519 while (location > 0) {
520 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
521 UTF32Char otherChar;
522 if (CFUniCharIsSurrogateLowCharacter(ch) && CFUniCharIsSurrogateHighCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location - 1)))) {
523 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
524 uint8_t planeNo = (ch >> 16);
525 if ((planeNo > 1) || (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)))) break;
526 location -= 2;
527 } else {
528 if ((!CFUniCharIsMemberOfBitmap(ch, nonBaseBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
529 --location;
530 }
531 }
532
533 return location;
534}
535
536static inline CFIndex __extendLocationForward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *alnumBMP, const uint8_t *punctBMP, const uint8_t *controlBMP, CFIndex strMax) {
537 do {
538 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
539 UTF32Char otherChar;
540 if (CFUniCharIsSurrogateHighCharacter(ch) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location + 1)))) {
541 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
542 location += 2;
543 uint8_t planeNo = (ch >> 16);
544 if (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, planeNo))) break;
545 } else {
546 ++location;
547 if ((!CFUniCharIsMemberOfBitmap(ch, alnumBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP) && !CFUniCharIsMemberOfBitmap(ch, controlBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
548 }
549 } while (location < strMax);
550 return location;
551}
552
553__private_extern__ CFComparisonResult _CFCompareStringsWithLocale(CFStringInlineBuffer *str1, CFRange str1Range, CFStringInlineBuffer *str2, CFRange str2Range, CFOptionFlags options, const void *compareLocale) {
554 const UniChar *characters1;
555 const UniChar *characters2;
556 CFComparisonResult compResult = kCFCompareEqualTo;
557 CFRange range1 = str1Range;
558 CFRange range2 = str2Range;
559 SInt32 order;
560 Boolean isEqual;
561 bool forcedOrdering = ((options & kCFCompareForcedOrdering) ? true : false);
562
563 UCollator *collator = NULL;
564 bool defaultCollator = true;
565 static const uint8_t *alnumBMP = NULL;
566 static const uint8_t *nonBaseBMP = NULL;
567 static const uint8_t *punctBMP = NULL;
568 static const uint8_t *controlBMP = NULL;
569
570 if (NULL == alnumBMP) {
571 alnumBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, 0);
572 nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
573 punctBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, 0);
574 controlBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, 0);
575 }
576
577 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
578
579 range1.location = str1Range.location;
580 range2.location = str2Range.location;
581
582 // go backward
583 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
584 if (range1.location > 0) {
585 range1.location = __extendLocationBackward(range1.location - 1, str1, nonBaseBMP, punctBMP);
586 }
587
588 if (range2.location > 0) {
589 range2.location = __extendLocationBackward(range2.location - 1, str2, nonBaseBMP, punctBMP);
590 }
591
592#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
593 // First we try to use the last one used on this thread, if the locale is the same,
594 // otherwise we try to check out a default one, or then we create one.
8ca704e1
A
595 UCollator *threadCollator = _CFGetTSD(__CFTSDKeyCollatorUCollator);
596 CFLocaleRef threadLocale = _CFGetTSD(__CFTSDKeyCollatorLocale);
cf7d2af9
A
597 if (compareLocale == threadLocale) {
598 collator = threadCollator;
9ce05555 599 } else {
d8925383 600#endif
cf7d2af9
A
601 collator = __CFStringCopyDefaultCollator((CFLocaleRef)compareLocale);
602 defaultCollator = true;
603 if (NULL == collator) {
604 collator = __CFStringCreateCollator((CFLocaleRef)compareLocale);
605 defaultCollator = false;
606 }
607#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
9ce05555 608 }
cf7d2af9 609#endif
d8925383 610
cf7d2af9
A
611 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
612 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
9ce05555 613
cf7d2af9
A
614 if ((NULL != characters1) && (NULL != characters2)) { // do fast
615 range1.length = (str1Range.location + str1Range.length) - range1.location;
616 range2.length = (str2Range.location + str2Range.length) - range2.location;
9ce05555 617
cf7d2af9
A
618 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
619 compResult = ((isEqual && !forcedOrdering) ? kCFCompareEqualTo : ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
620 } else {
621 compResult = ((memcmp(characters1, characters2, sizeof(UniChar) * range1.length) < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
622 }
623 } else {
624 UniChar *buffer1 = NULL;
625 UniChar *buffer2 = NULL;
626 UTF16Char sBuffer1[kCFStringCompareAllocationIncrement];
627 UTF16Char sBuffer2[kCFStringCompareAllocationIncrement];
628 CFIndex buffer1Len = 0, buffer2Len = 0;
629 CFIndex str1Max = str1Range.location + str1Range.length;
630 CFIndex str2Max = str2Range.location + str2Range.length;
631 CFIndex bufferSize;
632
633 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
634 do {
635 if (str1Range.location < str1Max) {
636 str1Range.location = __extendLocationForward(str1Range.location, str1, alnumBMP, punctBMP, controlBMP, str1Max);
637 range1.length = (str1Range.location - range1.location);
638 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
639
640 if (NULL == characters1) {
641 if ((0 > buffer1Len) || (range1.length > kCFStringCompareAllocationIncrement)) {
642 if (buffer1Len < range1.length) {
643 bufferSize = range1.length + (kCFStringCompareAllocationIncrement - (range1.length % kCFStringCompareAllocationIncrement));
644 if (0 == buffer1Len) {
645 buffer1 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
646 } else if (buffer1Len < range1.length) {
647 buffer1 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer1, sizeof(UTF16Char) * bufferSize, 0);
648 }
649 buffer1Len = bufferSize;
650 }
651 } else {
652 buffer1 = sBuffer1;
653 }
9ce05555 654
cf7d2af9
A
655 CFStringGetCharactersFromInlineBuffer(str1, range1, buffer1);
656 characters1 = buffer1;
657 }
658 }
659
660 if (str2Range.location < str2Max) {
661 str2Range.location = __extendLocationForward(str2Range.location, str2, alnumBMP, punctBMP, controlBMP, str2Max);
662 range2.length = (str2Range.location - range2.location);
663 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
664
665 if (NULL == characters2) {
666 if ((0 > buffer2Len) || (range2.length > kCFStringCompareAllocationIncrement)) {
667 if (buffer2Len < range2.length) {
668 bufferSize = range2.length + (kCFStringCompareAllocationIncrement - (range2.length % kCFStringCompareAllocationIncrement));
669 if (0 == buffer2Len) {
670 buffer2 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
671 } else if (buffer2Len < range2.length) {
672 buffer2 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer2, sizeof(UTF16Char) * bufferSize, 0);
673 }
674 buffer2Len = bufferSize;
675 }
676 } else {
677 buffer2 = sBuffer2;
678 }
679
680 CFStringGetCharactersFromInlineBuffer(str2, range2, buffer2);
681 characters2 = buffer2;
682 }
683 }
684
685 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
686 if (isEqual) {
687 if (forcedOrdering && (kCFCompareEqualTo == compResult) && (0 != order)) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
688 order = 0;
689 }
690 } else {
691 order = memcmp(characters1, characters2, sizeof(UTF16Char) * ((range1.length < range2.length) ? range1.length : range2.length));
692 if (0 == order) {
693 if (range1.length < range2.length) {
694 order = -2;
695 } else if (range2.length < range1.length) {
696 order = 2;
697 }
698 } else if (order < 0) {
699 --order;
700 } else if (order > 0) {
701 ++order;
702 }
703 }
704
705 if ((order < -1) || (order > 1)) break; // the result is deterministic
706
707 if (0 == order) {
708 range1.location = str1Range.location;
709 range2.location = str2Range.location;
710 }
711 } while ((str1Range.location < str1Max) || (str2Range.location < str2Max));
9ce05555 712
cf7d2af9
A
713 if (0 != order) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
714
715 if (buffer1Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer1);
716 if (buffer2Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer2);
717 }
718
719#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
720 if (collator == threadCollator) {
721 // do nothing, already cached
722 } else {
8ca704e1 723 if (threadLocale) __collatorFinalize((UCollator *)_CFGetTSD(__CFTSDKeyCollatorUCollator)); // need to dealloc collators
cf7d2af9 724
8ca704e1
A
725 _CFSetTSD(__CFTSDKeyCollatorUCollator, collator, (void *)__collatorFinalize);
726 _CFSetTSD(__CFTSDKeyCollatorLocale, (void *)CFRetain(compareLocale), NULL);
cf7d2af9
A
727 }
728#endif
729
730 return compResult;
731}
d8925383 732