/*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* @APPLE_LICENSE_HEADER_END@
*/
/* CFString.c
- Copyright 1998-2002, Apple, Inc. All rights reserved.
+ Copyright (c) 1998-2009, Apple Inc. All rights reserved.
Responsibility: Ali Ozer
!!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
#include <CoreFoundation/CFBase.h>
#include <CoreFoundation/CFString.h>
#include <CoreFoundation/CFDictionary.h>
-#include "CFStringEncodingConverterExt.h"
-#include "CFUniChar.h"
-#include "CFUnicodeDecomposition.h"
-#include "CFUnicodePrecomposition.h"
-#include "CFPriv.h"
+#include <CoreFoundation/CFStringEncodingConverterExt.h>
+#include <CoreFoundation/CFUniChar.h>
+#include <CoreFoundation/CFUnicodeDecomposition.h>
+#include <CoreFoundation/CFUnicodePrecomposition.h>
+#include <CoreFoundation/CFPriv.h>
#include "CFInternal.h"
+#include "CFLocaleInternal.h"
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
-#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
+#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
#include <unistd.h>
#endif
+#if DEPLOYMENT_TARGET_WINDOWS
+#define strncasecmp_l(a, b, c, d) _strnicmp(a, b, c)
+#endif
#if defined(__GNUC__)
#define LONG_DOUBLE_SUPPORT 1
#define INSTRUMENT_SHARED_STRINGS 0
#endif
-
-__private_extern__ CFStringRef __kCFLocaleCollatorID;
+__private_extern__ const CFStringRef __kCFLocaleCollatorID;
#if INSTRUMENT_SHARED_STRINGS
#include <sys/stat.h> /* for umask() */
if (! name) name = "UNKNOWN";
umask(0);
char path[1024];
- sprintf(path, "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name, getpid());
+ snprintf(path, sizeof(path), "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name, getpid());
fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0666);
if (fd <= 0) {
int error = errno;
if (fd > 0) {
char *buffer = NULL;
char formatString[256];
- sprintf(formatString, "%%-8d\t%%-16s\t%%.%lds\n", byteCount);
+ snprintf(formatString, sizeof(formatString), "%%-8d\t%%-16s\t%%.%lds\n", byteCount);
int resultCount = asprintf(&buffer, formatString, getpid(), encoding, bytes);
if (buffer && resultCount > 0) write(fd, buffer, resultCount);
else puts("Couldn't record allocation event");
typedef Boolean (*UNI_CHAR_FUNC)(UInt32 flags, UInt8 ch, UniChar *unicodeChar);
-#if DEPLOYMENT_TARGET_MACOSX
+#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
extern size_t malloc_good_size(size_t size);
#endif
extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars);
*/
CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) {
// XXX_PCB catch all writes for mutable string case.
- CF_WRITE_BARRIER_BASE_ASSIGN(__CFGetAllocator(str), str, ((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p);
+ __CFAssignWithWriteBarrier((void **)&((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p);
}
CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._cfinfo[CF_INFO_BITS], 6, 0, v);}
CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
// GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
- auto_zone_release(__CFCollectableZone, buffer);
+ auto_zone_release(auto_zone(), buffer);
} else {
CFAllocatorDeallocate(alloc, buffer);
}
static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId;
CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId;
-CFStringEncoding CFStringGetSystemEncoding(void) {
- if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
- const CFStringEncodingConverter *converter = NULL;
-#if DEPLOYMENT_TARGET_MACOSX
- __CFDefaultSystemEncoding = kCFStringEncodingMacRoman; // MacRoman is built-in so always available
-#elif defined(__WIN32__)
- __CFDefaultSystemEncoding = kCFStringEncodingWindowsLatin1; // WinLatin1 is built-in so always available
-#elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
- __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default
-#else // Solaris && HP-UX ?
- __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default
+#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
+#define __defaultEncoding kCFStringEncodingMacRoman
+#elif DEPLOYMENT_TARGET_WINDOWS
+#define __defaultEncoding kCFStringEncodingWindowsLatin1
+#else
+#warning This value must match __CFGetConverter condition in CFStringEncodingConverter.c
+#define __defaultEncoding kCFStringEncodingISOLatin1
#endif
- converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
- __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? (UNI_CHAR_FUNC)converter->toUnicode : NULL);
+CFStringEncoding CFStringGetSystemEncoding(void) {
+ if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
+ __CFDefaultSystemEncoding = __defaultEncoding;
+ const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
+ __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? (UNI_CHAR_FUNC)converter->toUnicode : NULL);
}
-
return __CFDefaultSystemEncoding;
}
CFStringEncoding CFStringFileSystemEncoding(void) {
if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) {
-#if DEPLOYMENT_TARGET_MACOSX
+#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
__CFDefaultFileSystemEncoding = kCFStringEncodingUTF8;
#else
__CFDefaultFileSystemEncoding = CFStringGetSystemEncoding();
}
/* ??? Is returning length when no other answer is available the right thing?
+ !!! All of the (length > (LONG_MAX / N)) type checks are to avoid wrap-around and eventual malloc overflow in the client
*/
CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) {
if (encoding == kCFStringEncodingUTF8) {
- return _CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? (length * 3) : (length * 6); // 1 Unichar could expand to 3 bytes; we return 6 for older apps for compatibility
+ if (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther)) { // 1 Unichar can expand to 3 bytes; we return 6 for older apps for compatibility
+ return (length > (LONG_MAX / 3)) ? kCFNotFound : (length * 3);
+ } else {
+ return (length > (LONG_MAX / 6)) ? kCFNotFound : (length * 6);
+ }
} else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32
- return length * sizeof(UTF32Char);
+ return (length > (LONG_MAX / sizeof(UTF32Char))) ? kCFNotFound : (length * sizeof(UTF32Char));
} else {
encoding &= 0xFFF; // Mask off non-base part
}
switch (encoding) {
case kCFStringEncodingUnicode:
- return length * sizeof(UniChar);
+ return (length > (LONG_MAX / sizeof(UniChar))) ? kCFNotFound : (length * sizeof(UniChar));
case kCFStringEncodingNonLossyASCII:
- return length * 6; // 1 Unichar could expand to 6 bytes
+ return (length > (LONG_MAX / 6)) ? kCFNotFound : (length * 6); // 1 Unichar can expand to 6 bytes
case kCFStringEncodingMacRoman:
case kCFStringEncodingWindowsLatin1:
/* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
*/
CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) {
- if (encoding == __CFStringGetEightBitStringEncoding()) return true;
+ // If the encoding is the same as the 8-bit CFString encoding, we can just use the bytes as-is.
+ // One exception is ASCII, which unfortunately needs to mean ISOLatin1 for compatibility reasons <rdar://problem/5458321>.
+ if (encoding == __CFStringGetEightBitStringEncoding() && encoding != kCFStringEncodingASCII) return true;
if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true;
return false;
}
#define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
-/* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity.
+/* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity. This function will return -1 if the new capacity is just too big (> LONG_MAX).
Additional complications are applied in the following order:
- desiredCapacity, which is the minimum (except initially things can be at zero)
- rounding up to factor of 8
#define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2))
#endif
-CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, CFIndex reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
+CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, unsigned long reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */
if ((capacity < reqCapacity) || /* We definitely need the room... */
(!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */
((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */
(!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */
- CFIndex newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */
+ if (reqCapacity > LONG_MAX) return -1; /* Too big any way you cut it */
+ unsigned long newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */
CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize;
if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */
newCapacity = desiredCapacity;
} else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */
}
- if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator; should we do something for */
+ if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator */
newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0);
-#if DEPLOYMENT_TARGET_MACOSX
+#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
} else {
newCapacity = malloc_good_size(newCapacity);
#endif
}
- return newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
+ return (newCapacity > LONG_MAX) ? -1 : (CFIndex)newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
}
}
return capacity;
*/
static void __CFStringHandleOutOfMemory(CFTypeRef obj) {
CFStringRef msg = CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save.");
- CFBadErrorCallBack cb = _CFGetOutOfMemoryErrorCallBack();
- if (NULL == cb || !cb(obj, CFSTR("NS/CFString"), msg)) {
+ {
CFLog(kCFLogLevelCritical, CFSTR("%@"), msg);
}
}
static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) {
const uint8_t *curContents = (uint8_t *)__CFStrContents(str);
CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0;
- CFIndex newLength;
+ unsigned long newLength; // We use unsigned to better keep track of overflow
// Compute new length of the string
if (numDeleteRanges == 1) {
Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */;
CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
CFIndex curCapacity = __CFStrCapacity(str);
+ if (newLength > (LONG_MAX - numExtraBytes) / newCharSize) __CFStringHandleOutOfMemory(str); // Does not return
CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize);
+ if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
uint8_t *newContents;
if (allocNewBuffer) {
newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
if (!newContents) { // Try allocating without extra room
newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, false, newCharSize);
+ // Since we checked for this above, it shouldn't be the case here, but just in case
+ if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
- if (!newContents) {
- __CFStringHandleOutOfMemory(str);
- // Ideally control doesn't come here at all since we expect the above call to raise an exception.
- // If control comes here, there isn't much we can do.
- }
+ if (!newContents) __CFStringHandleOutOfMemory(str); // Does not return
}
} else {
newContents = (uint8_t *)curContents;
else if (encoding == kCFStringEncodingUTF8) recordedEncoding = "UTF8";
else if (encoding == kCFStringEncodingMacRoman) recordedEncoding = "MacRoman";
else {
- sprintf(encodingBuffer, "0x%lX", (unsigned long)encoding);
+ snprintf(encodingBuffer, sizeof(encodingBuffer), "0x%lX", (unsigned long)encoding);
recordedEncoding = encodingBuffer;
}
#endif
}
// Now determine the necessary size
-
+#if INSTRUMENT_SHARED_STRINGS || USE_STRING_ROM
Boolean stringSupportsROM = stringSupportsEightBitCFRepresentation;
+#endif
#if INSTRUMENT_SHARED_STRINGS
if (stringSupportsROM) {
if (stringSupportsROM) {
// Disable the string ROM if necessary
static char sDisableStringROM = -1;
- if (sDisableStringROM == -1) sDisableStringROM = !! getenv("CFStringDisableROM");
+ if (sDisableStringROM == -1) sDisableStringROM = !! __CFgetenv("CFStringDisableROM");
if (sDisableStringROM == 0) romResult = _CFSearchStringROM(bytes + !! hasLengthByte, numBytes - !! hasLengthByte);
}
#endif
-#if 0
+#if DEPLOYMENT_TARGET_WINDOWS
void __CFStringCleanup (void) {
/* in case library is unloaded, release store for the constant string table */
if (constantStringTable != NULL) {
#else
CFRelease(constantStringTable);
#endif
+ constantStringTable = NULL;
}
}
#endif
if (__CFStrHasNullByte(str)) {
// Note: this is called a lot, 27000 times to open a small xcode project with one file open.
// Of these uses about 1500 are for cStrings/utf8strings.
+#if 0
+ // Only sometimes when the stars are aligned will this call return a gc pointer
+ // under GC we can only really return a pointer to the start of a GC buffer for cString use
+ // (Is there a simpler way to ask if contents isGC?)
+ CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
+ if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
+ if (__CFStrSkipAnyLengthByte(str) != 0 || !__CFStrIsMutable(str)) {
+ static int counter = 0;
+ printf("CFString %dth unsafe safe string %s\n", ++counter, __CFStrContents(str) + __CFStrSkipAnyLengthByte(str));
+ return NULL;
+ }
+ }
+#endif
return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str);
} else {
return NULL;
}
}
+extern Boolean __CFLocaleGetNullLocale(struct __CFLocale *locale);
+extern void __CFLocaleSetNullLocale(struct __CFLocale *locale);
+
static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale) {
CFStringRef collatorID;
const char *langID = NULL;
static const char *lastLangID = NULL;
static CFSpinLock_t lock = CFSpinLockInit;
+ if (__CFLocaleGetNullLocale((struct __CFLocale *)locale)) return NULL;
+
__CFSpinLock(&lock);
if ((NULL != lastLocale) && (lastLocale == locale)) {
__CFSpinUnlock(&lock);
}
__CFSpinUnlock(&lock);
- collatorID = CFLocaleGetValue(locale, __kCFLocaleCollatorID);
+ collatorID = (CFStringRef)CFLocaleGetValue(locale, __kCFLocaleCollatorID);
// This is somewhat depending on CFLocale implementation always creating CFString for locale identifer ???
if (__CFStrLength(collatorID) > 1) {
}
}
+
+ if (langID == NULL) __CFLocaleSetNullLocale((struct __CFLocale *)locale);
+
__CFSpinLock(&lock);
lastLocale = locale;
lastLangID = langID;
}
// decompose
- if (flags & (kCFCompareDiacriticsInsensitiveCompatibilityMask|kCFCompareNonliteral)) {
+ if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
UTF32Char original = character;
filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
character = *outCharacters;
- if ((flags & kCFCompareDiacriticsInsensitiveCompatibilityMask) && (character < 0x0510)) {
+ if ((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) {
filledLength = 1; // reset if Roman, Greek, Cyrillic
} else if (0 == (flags & kCFCompareNonliteral)) {
character = original;
// fold case
if (flags & kCFCompareCaseInsensitive) {
const uint8_t *nonBaseBitmap;
- bool filterNonBase = (((flags & kCFCompareDiacriticsInsensitiveCompatibilityMask) && (character < 0x0510)) ? true : false);
+ bool filterNonBase = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? true : false);
static const uint8_t *lowerBMP = NULL;
static const uint8_t *caseFoldBMP = NULL;
}
// collect following combining marks
- if (flags & (kCFCompareDiacriticsInsensitiveCompatibilityMask|kCFCompareNonliteral)) {
+ if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
const uint8_t *nonBaseBitmap;
const uint8_t *decompBitmap;
- bool doFill = (((flags & kCFCompareDiacriticsInsensitiveCompatibilityMask) && (character < 0x0510)) ? false : true);
+ bool doFill = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? false : true);
if (0 == filledLength) {
*outCharacters = character; // filledLength will be updated below on demand
if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
filledLength = 1; // For the base character
- if ((0 == (flags & kCFCompareDiacriticsInsensitiveCompatibilityMask)) || (nonBaseCharacter > 0x050F)) {
+ if ((0 == (flags & kCFCompareDiacriticInsensitive)) || (nonBaseCharacter > 0x050F)) {
if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
} else {
return filledLength;
}
-#define kCFStringStackBufferLength (64)
+static bool __CFStringFillCharacterSetInlineBuffer(CFCharacterSetInlineBuffer *buffer, CFStringCompareFlags compareOptions) {
+ if (0 != (compareOptions & kCFCompareIgnoreNonAlphanumeric)) {
+ static CFCharacterSetRef nonAlnumChars = NULL;
+
+ if (NULL == nonAlnumChars) {
+ CFMutableCharacterSetRef cset = CFCharacterSetCreateMutableCopy(NULL, CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric));
+ CFCharacterSetInvert(cset);
+ if (!OSAtomicCompareAndSwapPtrBarrier(NULL, cset, (void **)&nonAlnumChars)) CFRelease(cset);
+ }
+
+ CFCharacterSetInitInlineBuffer(nonAlnumChars, buffer);
+
+ return true;
+ }
+
+ return false;
+}
+
+#define kCFStringStackBufferLength (__kCFStringInlineBufferLength)
-CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFOptionFlags compareOptions, CFLocaleRef locale) {
+CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions, CFLocaleRef locale) {
/* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
UTF32Char strBuf1[kCFStringStackBufferLength];
UTF32Char strBuf2[kCFStringStackBufferLength];
UTF32Char str1Char, str2Char;
CFIndex str1UsedLen, str2UsedLen;
CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0;
+ CFIndex str1LocalizedIndex = 0, str2LocalizedIndex = 0;
+ CFIndex forcedIndex1 = 0, forcedIndex2 = 0;
CFIndex str2Len = CFStringGetLength(string2);
bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
- bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticsInsensitiveCompatibilityMask) ? true : false);
- bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticsInsensitiveCompatibilityMask|kCFCompareWidthInsensitive)) ? true : false);
+ bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
+ bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive)) ? true : false);
bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false);
+ bool forceOrdering = ((compareOptions & kCFCompareForcedOrdering) ? true : false);
const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
const uint8_t *langCode;
CFComparisonResult compareResult = kCFCompareEqualTo;
UTF16Char otherChar;
Boolean freeLocale = false;
-
- #define _CFCompareStringsWithLocale(A, B, C, D, E, F) (0)
- locale = NULL;
+ CFCharacterSetInlineBuffer *ignoredChars = NULL;
+ CFCharacterSetInlineBuffer csetBuffer;
if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) {
locale = CFLocaleCopyCurrent();
langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale));
- if ((NULL == locale) && !numerically) { // could do binary comp (be careful when adding new flags)
+ if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
+ ignoredChars = &csetBuffer;
+ equalityOptions = true;
+ }
+
+ if ((NULL == locale) && (NULL == ignoredChars) && !numerically) { // could do binary comp (be careful when adding new flags)
CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(string2, eightBitEncoding);
if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
compareOptions &= ~kCFCompareNonliteral; // remove non-literal
- if (kCFStringEncodingASCII == eightBitEncoding) {
+ if ((kCFStringEncodingASCII == eightBitEncoding) && (false == forceOrdering)) {
if (caseInsensitive) {
int cmpResult = strncasecmp_l((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL);
if (str1Char != str2Char) {
if ((str1Char < 0x80) && (str2Char < 0x80)) {
+ if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A');
if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A');
if (str1Index == limitLength) {
int cmpResult = rangeToCompare.length - str2Len;
- return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
+ return ((0 == cmpResult) ? compareResult : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
}
}
} else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) {
CFStringInitInlineBuffer(string, &inlineBuf1, rangeToCompare);
CFStringInitInlineBuffer(string2, &inlineBuf2, CFRangeMake(0, str2Len));
+ if (NULL != locale) {
+ str1LocalizedIndex = str1Index;
+ str2LocalizedIndex = str2Index;
+
+ // We temporarily disable kCFCompareDiacriticInsensitive for SL <rdar://problem/6767096>. Should be revisited in NMOS <rdar://problem/7003830>
+ if (forceOrdering) {
+ diacriticsInsensitive = false;
+ compareOptions &= ~kCFCompareDiacriticInsensitive;
+ }
+ }
while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) {
if (strBuf1Len == 0) {
str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
- if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
+ if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str1Char += ('a' - 'A');
str1UsedLen = 1;
} else {
str1Char = strBuf1[strBuf1Index++];
}
if (strBuf2Len == 0) {
str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
- if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
+ if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str2Char += ('a' - 'A');
str2UsedLen = 1;
} else {
str2Char = strBuf2[strBuf2Index++];
}
-
+
if (numerically && ((0 == strBuf1Len) && (str1Char <= '9') && (str1Char >= '0')) && ((0 == strBuf2Len) && (str2Char <= '9') && (str2Char >= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
uint64_t intValue1 = 0, intValue2 = 0; // !!! Doesn't work if numbers are > max uint64_t
+ if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) {
+ compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
+ forcedIndex1 = str1Index;
+ forcedIndex2 = str2Index;
+ }
+
do {
intValue1 = (intValue1 * 10) + (str1Char - '0');
str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, ++str1Index);
if (str1Char != str2Char) {
if (!equalityOptions) {
- CFComparisonResult res = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(strBuf1Index, rangeToCompare.length - strBuf1Index), &inlineBuf2, CFRangeMake(strBuf2Index, str2Len - strBuf2Index), compareOptions, locale));
+ compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale));
if (freeLocale && locale) {
CFRelease(locale);
}
- return res;
+ return compareResult;
}
- if ((compareOptions & kCFCompareForcedOrdering) && (kCFCompareEqualTo == compareResult)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
+ if (forceOrdering && (kCFCompareEqualTo == compareResult)) {
+ compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
+ forcedIndex1 = str1LocalizedIndex;
+ forcedIndex2 = str2LocalizedIndex;
+ }
- if ((str1Char < 0x80) && (str2Char < 0x80)) {
+ if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars)) {
if (NULL != locale) {
- CFComparisonResult res = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(strBuf1Index, rangeToCompare.length - strBuf1Index), &inlineBuf2, CFRangeMake(strBuf2Index, str2Len - strBuf2Index), compareOptions, locale);
+ compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale);
if (freeLocale && locale) {
CFRelease(locale);
}
- return res;
+ return compareResult;
} else if (!caseInsensitive) {
if (freeLocale && locale) {
CFRelease(locale);
str2UsedLen = 2;
}
+ if (NULL != ignoredChars) {
+ if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
+ if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
+ if (strBuf1Len == 0) str1Index += str1UsedLen;
+ if (strBuf2Len > 0) --strBuf2Index;
+ continue;
+ }
+ if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
+ if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
+ if (strBuf2Len == 0) str2Index += str2UsedLen;
+ if (strBuf1Len > 0) -- strBuf1Index;
+ continue;
+ }
+ }
+
if (diacriticsInsensitive && (str1Index > 0)) {
bool str1Skip = false;
bool str2Skip = false;
}
if ((0 == strBuf1Len) && (0 < strBuf2Len)) {
- CFComparisonResult res = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(strBuf1Index, rangeToCompare.length - strBuf1Index), &inlineBuf2, CFRangeMake(strBuf2Index, str2Len - strBuf2Index), compareOptions, locale));
+ compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
if (freeLocale && locale) {
CFRelease(locale);
}
- return res;
+ return compareResult;
}
if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
strBuf2Index = 1;
}
if ((0 == strBuf2Len) || (str1Char != str2Char)) {
- CFComparisonResult res = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(strBuf1Index, rangeToCompare.length - strBuf1Index), &inlineBuf2, CFRangeMake(strBuf2Index, str2Len - strBuf2Index), compareOptions, locale));
+ compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
if (freeLocale && locale) {
CFRelease(locale);
}
- return res;
+ return compareResult;
}
}
}
++strBuf1Index; ++strBuf2Index;
}
if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
- CFComparisonResult res = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(strBuf1Index, rangeToCompare.length - strBuf1Index), &inlineBuf2, CFRangeMake(strBuf2Index, str2Len - strBuf2Index), compareOptions, locale));
+ CFComparisonResult res = ((NULL == locale) ? ((strBuf1[strBuf1Index] < strBuf2[strBuf2Index]) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
if (freeLocale && locale) {
CFRelease(locale);
}
if (strBuf1Len == 0) str1Index += str1UsedLen;
if (strBuf2Len == 0) str2Index += str2UsedLen;
+ if ((strBuf1Len == 0) && (strBuf2Len == 0)) {
+ str1LocalizedIndex = str1Index;
+ str2LocalizedIndex = str2Index;
+ }
}
- if (diacriticsInsensitive) {
+ if (diacriticsInsensitive || (NULL != ignoredChars)) {
while (str1Index < rangeToCompare.length) {
str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
- if (str1Char < 0x80) break; // found ASCII
+ if ((str1Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
- if (!CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) break;
+ if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char))) break;
str1Index += ((str1Char < 0x10000) ? 1 : 2);
}
while (str2Index < str2Len) {
str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
- if (str2Char < 0x80) break; // found ASCII
+ if ((str2Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
- if (!CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) break;
+ if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char))) break;
str2Index += ((str2Char < 0x10000) ? 1 : 2);
}
}
+ // Need to recalc localized result here for forced ordering
+ if ((NULL != locale) && (kCFCompareEqualTo != compareResult) && (str1Index == rangeToCompare.length) && (str2Index == str2Len)) compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(forcedIndex1, rangeToCompare.length - forcedIndex1), &inlineBuf2, CFRangeMake(forcedIndex2, str2Len - forcedIndex2), compareOptions, locale);
if (freeLocale && locale) {
CFRelease(locale);
}
-CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFOptionFlags compareOptions) { return CFStringCompareWithOptionsAndLocale(string, string2, rangeToCompare, compareOptions, NULL); }
+CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions) { return CFStringCompareWithOptionsAndLocale(string, string2, rangeToCompare, compareOptions, NULL); }
CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFOptionFlags options) {
return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options);
}
-Boolean CFStringFindWithOptionsAndLocale(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions, CFLocaleRef locale, CFRange *result) {
+Boolean CFStringFindWithOptionsAndLocale(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFLocaleRef locale, CFRange *result) {
/* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
CFIndex findStrLen = CFStringGetLength(stringToFind);
Boolean didFind = false;
- bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticsInsensitiveCompatibilityMask)) ? true : false);
+ bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive)) ? true : false);
+ CFCharacterSetInlineBuffer *ignoredChars = NULL;
+ CFCharacterSetInlineBuffer csetBuffer;
+
+ if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
+ ignoredChars = &csetBuffer;
+ lengthVariants = true;
+ }
if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) {
UTF32Char strBuf1[kCFStringStackBufferLength];
UTF32Char strBuf2[kCFStringStackBufferLength];
CFStringInlineBuffer inlineBuf1, inlineBuf2;
- UTF32Char str1Char, str2Char;
+ UTF32Char str1Char = 0, str2Char = 0;
CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(stringToFind, eightBitEncoding);
CFIndex fromLoc, toLoc;
CFIndex str1Index, str2Index;
CFIndex strBuf1Len, strBuf2Len;
+ CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false);
bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
+ bool forwardAnchor = ((kCFCompareAnchored == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
+ bool backwardAnchor = (((kCFCompareBackwards|kCFCompareAnchored) == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
int8_t delta;
if (NULL == locale) {
delta = ((fromLoc <= toLoc) ? 1 : -1);
if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
- CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
uint8_t str1Byte, str2Byte;
while (1) {
strBuf1Len = 1;
}
}
+
+ if ((NULL != ignoredChars) && (forwardAnchor || (str1Index != fromLoc)) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str1Byte < 0x80) ? str1Byte : str1Char))) {
+ ++str1Index;
+ continue;
+ }
+
if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) {
if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A');
*strBuf2 = str2Byte;
}
}
+ if ((NULL != ignoredChars) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str2Byte < 0x80) ? str2Byte : str2Char))) {
+ ++str2Index;
+ continue;
+ }
+
if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case
if (*strBuf1 != *strBuf2) break;
} else {
if (strBuf1Len < strBuf2Len) {
delta = strBuf2Len - strBuf1Len;
- if ((str1Index + strBuf1Len + delta) > (rangeToSearch.location + rangeToSearch.length)) break;
+ if ((str1Index + strBuf1Len + delta) > maxStr1Index) break;
characters = &(strBuf2[strBuf1Len]);
charactersLimit = characters + delta;
++str1Index; ++str2Index;
}
+ if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
+ while (str2Index < findStrLen) {
+ str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
+
+ if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
+ ++str2Index;
+ }
+ }
+
if (str2Index == findStrLen) {
- if (((kCFCompareBackwards|kCFCompareAnchored) != (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) || (str1Index == (rangeToSearch.location + rangeToSearch.length))) {
+ if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
+ while (str1Index < maxStr1Index) {
+ str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
+
+ if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
+ ++str1Index;
+ }
+ }
+
+ if (!backwardAnchor || (str1Index == maxStr1Index)) {
didFind = true;
if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
}
} else if (equalityOptions) {
UTF16Char otherChar;
CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0;
- bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticsInsensitiveCompatibilityMask) ? true : false);
+ bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
const uint8_t *combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
}
if (str1Char != str2Char) {
- if ((str1Char < 0x80) && (str2Char < 0x80) && ((NULL == langCode) || !caseInsensitive)) break;
+ if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars) && ((NULL == langCode) || !caseInsensitive)) break;
if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
str2UsedLen = 2;
}
+ if (NULL != ignoredChars) {
+ if ((forwardAnchor || (str1Index != fromLoc)) && (str1Index < maxStr1Index) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
+ if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
+ if (strBuf1Len == 0) str1Index += str1UsedLen;
+ if (strBuf2Len > 0) --strBuf2Index;
+ continue;
+ }
+ if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
+ if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
+ if (strBuf2Len == 0) str2Index += str2UsedLen;
+ if (strBuf1Len > 0) -- strBuf1Index;
+ continue;
+ }
+ }
+
if (diacriticsInsensitive && (str1Index > fromLoc)) {
bool str1Skip = false;
bool str2Skip = false;
if (strBuf2Len == 0) str2Index += str2UsedLen;
}
+ if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
+ while (str2Index < findStrLen) {
+ str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
+ if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
+ str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
+ }
+ if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
+ str2Index += ((str2Char < 0x10000) ? 1 : 2);
+ }
+ }
+
if (str2Index == findStrLen) {
bool match = true;
if (strBuf1Len > 0) {
match = false;
- if ((compareOptions & kCFCompareDiacriticsInsensitiveCompatibilityMask) && (strBuf1[0] < 0x0510)) {
+ if ((compareOptions & kCFCompareDiacriticInsensitive) && (strBuf1[0] < 0x0510)) {
while (strBuf1Index < strBuf1Len) {
if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break;
++strBuf1Index;
}
}
- if (match && (compareOptions & (kCFCompareDiacriticsInsensitiveCompatibilityMask|kCFCompareNonliteral)) && (str1Index < (rangeToSearch.location + rangeToSearch.length))) {
+ if (match && (compareOptions & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) && (str1Index < maxStr1Index)) {
const uint8_t *nonBaseBitmap;
str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
} while (CFUniCharIsMemberOfBitmap(str1Char, graphemeBMP), (rangeToSearch.location < index));
if (str1Char < 0x0510) {
- CFIndex maxIndex = (rangeToSearch.location + rangeToSearch.length);
-
- while (++str1Index < maxIndex) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), graphemeBMP)) break;
+ while (++str1Index < maxStr1Index) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), graphemeBMP)) break;
}
}
} else {
}
if (match) {
- if (((kCFCompareBackwards|kCFCompareAnchored) != (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) || (str1Index == (rangeToSearch.location + rangeToSearch.length))) {
+ if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
+ while (str1Index < maxStr1Index) {
+ str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
+ if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
+ str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
+ }
+ if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
+ str1Index += ((str1Char < 0x10000) ? 1 : 2);
+ }
+ }
+
+ if (!backwardAnchor || (str1Index == maxStr1Index)) {
didFind = true;
if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
}
return didFind;
}
-Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions, CFRange *result) { return CFStringFindWithOptionsAndLocale(string, stringToFind, rangeToSearch, compareOptions, NULL, result); }
+
+Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFRange *result) { return CFStringFindWithOptionsAndLocale(string, stringToFind, rangeToSearch, compareOptions, NULL, result); }
// Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
}
-CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions) {
+CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
CFRange foundRange;
Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
}
-CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFOptionFlags compareOptions) {
+CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFStringCompareFlags compareOptions) {
CFRange foundRange;
if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) {
return range;
}
-#if 1 /* Using the new implementation. Leaving the old implementation if'ed out for testing purposes for now */
CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster);
}
-#else
-/*!
- @function CFStringGetRangeOfComposedCharactersAtIndex
- Returns the range of the composed character sequence at the specified index.
- @param theString The CFString which is to be searched. If this
- parameter is not a valid CFString, the behavior is
- undefined.
- @param theIndex The index of the character contained in the
- composed character sequence. If the index is
- outside the index space of the string (0 to N-1 inclusive,
- where N is the length of the string), the behavior is
- undefined.
- @result The range of the composed character sequence.
-*/
-#define ExtHighHalfZoneLow 0xD800
-#define ExtHighHalfZoneHigh 0xDBFF
-#define ExtLowHalfZoneLow 0xDC00
-#define ExtLowHalfZoneHigh 0xDFFF
-#define JunseongStart 0x1160
-#define JonseongEnd 0x11F9
-CF_INLINE Boolean IsHighCode(UniChar X) { return (X >= ExtHighHalfZoneLow && X <= ExtHighHalfZoneHigh); }
-CF_INLINE Boolean IsLowCode(UniChar X) { return (X >= ExtLowHalfZoneLow && X <= ExtLowHalfZoneHigh); }
-#define IsHangulConjoiningJamo(X) (X >= JunseongStart && X <= JonseongEnd)
-#define IsHalfwidthKanaVoicedMark(X) ((X == 0xFF9E) || (X == 0xFF9F))
-CF_INLINE Boolean IsNonBaseChar(UniChar X, CFCharacterSetRef nonBaseSet) { return (CFCharacterSetIsCharacterMember(nonBaseSet, X) || IsHangulConjoiningJamo(X) || IsHalfwidthKanaVoicedMark(X) || (X & 0x1FFFF0) == 0xF870); } // combining char, hangul jamo, or Apple corporate variant tag
-#define ZWJ 0x200D
-#define ZWNJ 0x200C
-#define COMBINING_GRAPHEME_JOINER (0x034F)
-
-static CFCharacterSetRef nonBaseChars = NULL;
-static CFCharacterSetRef letterChars = NULL;
-static const void *__CFCombiningClassBMP = NULL;
-
-CF_INLINE bool IsVirama(UTF32Char character) {
- return ((character == COMBINING_GRAPHEME_JOINER) ? true : ((character < 0x10000) && (CFUniCharGetCombiningPropertyForCharacter(character, __CFCombiningClassBMP) == 9) ? true : false));
-}
-
-CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
- CFIndex left, current, save;
- CFIndex len = CFStringGetLength(theString);
- CFStringInlineBuffer stringBuffer;
- static volatile Boolean _isInited = false;
-
- if (theIndex >= len) return CFRangeMake(kCFNotFound, 0);
-
- if (!_isInited) {
- nonBaseChars = CFCharacterSetGetPredefined(kCFCharacterSetNonBase);
- letterChars = CFCharacterSetGetPredefined(kCFCharacterSetLetter);
- __CFCombiningClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
- _isInited = true;
- }
-
- save = current = theIndex;
-
- CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, len));
-
- /*
- * First check for transcoding hints
- */
- {
- CFRange theRange = (current > MAX_TRANSCODING_LENGTH ? CFRangeMake(current - MAX_TRANSCODING_LENGTH, MAX_TRANSCODING_LENGTH + 1) : CFRangeMake(0, current + 1));
-
- // Should check the next loc ?
- if (current + 1 < len) ++theRange.length;
-
- if (theRange.length > 1) {
- UniChar characterBuffer[MAX_TRANSCODING_LENGTH + 2]; // Transcoding hint length + current loc + next loc
-
- if (stringBuffer.directBuffer) {
- memmove(characterBuffer, stringBuffer.directBuffer + theRange.location, theRange.length * sizeof(UniChar));
- } else {
- CFStringGetCharacters(theString, theRange, characterBuffer);
- }
-
- while (current >= theRange.location) {
- if ((characterBuffer[current - theRange.location] & 0x1FFFF0) == 0xF860) {
- theRange = CFRangeMake(current, __CFTranscodingHintLength[characterBuffer[current - theRange.location] - 0xF860] + 1);
- if ((theRange.location + theRange.length) <= theIndex) break;
- if ((theRange.location + theRange.length) >= len) theRange.length = len - theRange.location;
- return theRange;
- }
- if (current == 0) break;
- --current;
- }
- current = theIndex; // Reset current
- }
- }
-
-//#warning Aki 5/29/01 This does not support non-base chars in non-BMP planes (i.e. musical symbol combining stem in Unicode 3.1)
- /*
- * if we start NOT on a base, first move back to a base as appropriate.
- */
-
- roundAgain:
-
- while ((current > 0) && IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) --current;
-
- if (current >= 1 && current < len && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) {
- --current;
- goto roundAgain;
- } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) {
- current -= 2;
- goto roundAgain;
- }
-
- /*
- * Set the left position, then jump back to the saved original position.
- */
-
- if (current >= 1 && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) --current;
- left = current;
- current = save;
-
- /*
- * Now, presume we are on a base; move forward & look for the next base.
- * Handle jumping over H/L codes.
- */
- if (IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && (current + 1) < len && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current + 1))) ++current;
- ++current;
-
- round2Again:
-
- if (current < len) {
- while (IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) {
- ++current;
- if (current >= len) break;
- }
- if ((current < len) && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current))) {
- if (IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) {
- ++current; goto round2Again;
- } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) {
- ++current; goto round2Again;
- }
- }
- }
- /*
- * Now, "current" is a base, and "left" is a base.
- * The junk between had better contain "save"!
- */
- if ((! (left <= save)) || (! (save <= current))) {
- CFLog(kCFLogLevelWarning, CFSTR("CFString: CFStringGetRangeOfComposedCharactersAtIndex:%d returned invalid\n"), save);
- }
- return CFRangeMake(left, current - left);
-}
-#endif
/*!
@function CFStringFindCharacterFromSet
#define SURROGATE_START 0xD800
#define SURROGATE_END 0xDFFF
-CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFOptionFlags searchOptions, CFRange *result) {
+CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFRange *result) {
CFStringInlineBuffer stringBuffer;
CFCharacterSetInlineBuffer csetBuffer;
UniChar ch;
if (((encoding & 0x0FFF) == kCFStringEncodingUnicode) && ((encoding == kCFStringEncodingUnicode) || ((encoding > kCFStringEncodingUTF8) && (encoding <= kCFStringEncodingUTF32LE)))) {
guessedByteLength = (length + 1) * ((((encoding >> 26) & 2) == 0) ? sizeof(UTF16Char) : sizeof(UTF32Char)); // UTF32 format has the bit set
} else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding
-#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
+#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
if (__CFStrIsUnicode(string)) {
CFIndex aLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string));
if (aLength > 0) guessedByteLength = aLength;
if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !!
return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
}
-#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
+#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
}
#endif
}
}
-CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFOptionFlags compareOptions) {
+CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
CFRange foundRange;
Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
_CFStringAppendFormatAndArgumentsAux(outputString, NULL, formatOptions, formatString, args);
}
-#if DEPLOYMENT_TARGET_MACOSX
+#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
#define SNPRINTF(TYPE, WHAT) { \
TYPE value = (TYPE) WHAT; \
if (-1 != specs[curSpec].widthArgNum) { \
TYPE value = (TYPE) WHAT; \
if (-1 != specs[curSpec].widthArgNum) { \
if (-1 != specs[curSpec].precArgNum) { \
- snprintf(buffer, 255, formatBuffer, width, precision, value); \
+ sprintf(buffer, formatBuffer, width, precision, value); \
} else { \
- snprintf(buffer, 255, formatBuffer, width, value); \
+ sprintf(buffer, formatBuffer, width, value); \
} \
} else { \
if (-1 != specs[curSpec].precArgNum) { \
- snprintf(buffer, 255, formatBuffer, precision, value); \
+ sprintf(buffer, formatBuffer, precision, value); \
} else { \
- snprintf(buffer, 255, formatBuffer, value); \
+ sprintf(buffer, formatBuffer, value); \
} \
}}
-#endif //__MACH__
+#endif
void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum;
}
// See if we need to localize the decimal point
if (formatOptions) { // We have localization info
- CFStringRef decimalSeparator = (CFGetTypeID(formatOptions) == CFLocaleGetTypeID()) ? (CFStringRef)CFLocaleGetValue((CFLocaleRef)formatOptions, kCFLocaleDecimalSeparator) : (CFStringRef)CFDictionaryGetValue(formatOptions, CFSTR("NSDecimalSeparator"));
+ CFStringRef decimalSeparator = (CFGetTypeID(formatOptions) == CFLocaleGetTypeID()) ? (CFStringRef)CFLocaleGetValue((CFLocaleRef)formatOptions, kCFLocaleDecimalSeparatorKey) : (CFStringRef)CFDictionaryGetValue(formatOptions, CFSTR("NSDecimalSeparator"));
if (decimalSeparator != NULL) { // We have a decimal separator in there
CFIndex decimalPointLoc = 0;
while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++;
if (dynamicBuffer) {
CFAllocatorDeallocate(kCFAllocatorSystemDefault, dynamicBuffer);
}
-#endif
+#endif
}
break;
case CFFormatLiteralType: