/*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* @APPLE_LICENSE_HEADER_END@
*/
/* CFString.h
- Copyright (c) 1998-2007, Apple Inc. All rights reserved.
+ Copyright (c) 1998-2009, Apple Inc. All rights reserved.
*/
#if !defined(__COREFOUNDATION_CFSTRING__)
/*
Please note: CFStrings are conceptually an array of Unicode characters.
However, in general, how a CFString stores this array is an implementation
-detail. For instance, CFString might choose to use an array of 8-bit characters;
-to store its contents; or it might use multiple blocks of memory; or whatever.
-Furthermore, the implementation might change depending on the default
-system encoding, the user's language, the OS, or even a given release.
+detail. For instance, CFString might choose to use an array of 8-bit characters
+to store its contents, or it might use multiple blocks of memory, or whatever.
+This is especially true since CFString is toll-free bridged with NSString, enabling
+any NSString instance to be used as a CFString. Furthermore, the implementation
+may change depending on the default system encoding, the user's language,
+or even a release or update of the OS.
What this means is that you should use the following advanced functions with care:
In your usages of these functions, if you get a NULL return, use the non-Ptr version
of the functions as shown in this example:
- Str255 buffer;
- StringPtr ptr = CFStringGetPascalStringPtr(str, encoding);
+ char buffer[BUFSIZE];
+ const char *ptr = CFStringGetCStringPtr(str, encoding);
if (ptr == NULL) {
- if (CFStringGetPascalString(str, buffer, 256, encoding)) ptr = buffer;
+ if (CFStringGetCString(str, buffer, BUFSIZE, encoding)) ptr = buffer;
}
-Note that CFStringGetPascalString() or CFStringGetCString() calls might still fail --- but
+Note that CFStringGetCString() or CFStringGetPascalString() calls might still fail --- but
that will happen in two circumstances only: The conversion from the UniChar contents of CFString
to the specified encoding fails, or the buffer is too small. If they fail, that means
the conversion was not possible.
-If you need a copy of the buffer in the above example, you might consider simply
-calling CFStringGetPascalString() in all cases --- CFStringGetPascalStringPtr()
-is simply an optimization.
+If you need a copy of the buffer in the above example, you might consider simply calling
+CFStringGetCString() in all cases --- CFStringGetCStringPtr() is simply an optimization.
In addition, the following functions, which create immutable CFStrings from developer
supplied buffers without copying the buffers, might have to actually copy
kCFStringEncodingUnicode = 0x0100, /* kTextEncodingUnicodeDefault + kTextEncodingDefaultFormat (aka kUnicode16BitFormat) */
kCFStringEncodingUTF8 = 0x08000100, /* kTextEncodingUnicodeDefault + kUnicodeUTF8Format */
kCFStringEncodingNonLossyASCII = 0x0BFF /* 7bit Unicode variants used by Cocoa & Java */
-#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4
+#if MAC_OS_X_VERSION_10_4 <= MAC_OS_X_VERSION_MAX_ALLOWED
,
kCFStringEncodingUTF16 = 0x0100, /* kTextEncodingUnicodeDefault + kUnicodeUTF16Format (alias of kCFStringEncodingUnicode) */
kCFStringEncodingUTF16BE = 0x10000100, /* kTextEncodingUnicodeDefault + kUnicodeUTF16BEFormat */
kCFStringEncodingUTF32 = 0x0c000100, /* kTextEncodingUnicodeDefault + kUnicodeUTF32Format */
kCFStringEncodingUTF32BE = 0x18000100, /* kTextEncodingUnicodeDefault + kUnicodeUTF32BEFormat */
kCFStringEncodingUTF32LE = 0x1c000100 /* kTextEncodingUnicodeDefault + kUnicodeUTF32LEFormat */
-#endif /* MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4 */
+#endif /* MAC_OS_X_VERSION_10_4 <= MAC_OS_X_VERSION_MAX_ALLOWED */
};
typedef CFStringEncoding CFStringBuiltInEncodings;
+
/* CFString type ID */
CF_EXPORT
CFTypeID CFStringGetTypeID(void);
-/* Macro to allow creation of compile-time constant strings; the argument should be a constant string.
+/* CFSTR() allows creation of compile-time constant CFStringRefs; the argument
+should be a constant C-string.
CFSTR(), not being a "Copy" or "Create" function, does not return a new
reference for you. So, you should not release the return value. This is
a CFSTR() return value to a function such as SetMenuItemWithCFString(), the
function can retain it, then later, when it's done with it, it can release it.
-At this point non-7 bit characters (that is, characters > 127) in CFSTR() are not
-supported and using them will lead to unpredictable results. This includes escaped
-(\nnn) characters whose values are > 127. Even if it works for you in testing,
-it might not work for a user with a different language preference.
+Non-7 bit characters (that is, above 127) in CFSTR() are supported, although care must
+be taken in dealing with files containing them. If you can trust your editor and tools
+to deal with non-ASCII characters in the source code, then you can use them directly
+in CFSTR(); otherwise, you can represent such characters with their escaped octal
+equivalents in the encoding the compiler will use to interpret them (for instance,
+O-umlaut is \303\226 in UTF-8). UTF-8 is the recommended encoding here,
+since it is the default choice with Mac OS X developer tools.
*/
+#if TARGET_OS_WIN32
+#undef __CONSTANT_CFSTRINGS__
+#endif
+
#ifdef __CONSTANT_CFSTRINGS__
#define CFSTR(cStr) ((CFStringRef) __builtin___CFStringMakeConstantString ("" cStr ""))
#else
#define CFSTR(cStr) __CFStringMakeConstantString("" cStr "")
#endif
+#if defined(__GNUC__) && (__GNUC__*10+__GNUC_MINOR__ >= 42) && !defined(__INTEL_COMPILER) && (TARGET_OS_MAC || TARGET_OS_EMBEDDED)
+#define CF_FORMAT_FUNCTION(F,A) __attribute__((format(CFString, F, A)))
+#define CF_FORMAT_ARGUMENT(A) __attribute__((format_arg(A)))
+#else
+#define CF_FORMAT_FUNCTION(F,A)
+#define CF_FORMAT_ARGUMENT(A)
+#endif
+
/*** Immutable string creation functions ***/
/* Functions to create basic immutable strings. The provided allocator is used for all memory activity in these functions.
CF_EXPORT
CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator);
-#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_4
+#if MAC_OS_X_VERSION_10_4 <= MAC_OS_X_VERSION_MAX_ALLOWED
/* The following takes an explicit length, and allows you to specify whether the data is an external format --- that is, whether to pay attention to the BOM character (if any) and do byte swapping if necessary
*/
CF_EXPORT
/* These functions create a CFString from the provided printf-like format string and arguments.
*/
CF_EXPORT
-CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...);
+CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) CF_FORMAT_FUNCTION(3,4);
CF_EXPORT
-CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments);
+CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) CF_FORMAT_FUNCTION(3,0);
/* Functions to create mutable strings. "maxLength", if not 0, is a hard bound on the length of the string. If 0, there is no limit on the length.
*/
const UniChar *CFStringGetCharactersPtr(CFStringRef theString); /* May return NULL at any time; be prepared for NULL */
/* The primitive conversion routine; allows you to convert a string piece at a time
- into a fixed size buffer. Returns number of characters converted.
+ into a fixed size buffer. Returns number of characters converted.
Characters that cannot be converted to the specified encoding are represented
- with the byte specified by lossByte; if lossByte is 0, then lossy conversion
- is not allowed and conversion stops, returning partial results.
+ with the byte specified by lossByte; if lossByte is 0, then lossy conversion
+ is not allowed and conversion stops, returning partial results.
Pass buffer==NULL if you don't care about the converted string (but just the convertability,
- or number of bytes required).
+ or number of bytes required).
maxBufLength indicates the maximum number of bytes to generate. It is ignored when buffer==NULL.
Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
Setting isExternalRepresentation causes any extra bytes that would allow
- the data to be made persistent to be included; for instance, the Unicode BOM.
+ the data to be made persistent to be included; for instance, the Unicode BOM. Note that
+ CFString prepends UTF encoded data with the Unicode BOM <http://www.unicode.org/faq/utf_bom.html>
+ when generating external representation if the target encoding allows. It's important to note that
+ only UTF-8, UTF-16, and UTF-32 define the handling of the byte order mark character, and the "LE"
+ and "BE" variants of UTF-16 and UTF-32 don't.
*/
CF_EXPORT
CFIndex CFStringGetBytes(CFStringRef theString, CFRange range, CFStringEncoding encoding, UInt8 lossByte, Boolean isExternalRepresentation, UInt8 *buffer, CFIndex maxBufLen, CFIndex *usedBufLen);
/* Convenience functions String <-> Data. These generate "external" formats, that is, formats that
- can be written out to disk. For instance, if the encoding is Unicode, CFStringCreateFromExternalRepresentation()
- pays attention to the BOM character (if any) and does byte swapping if necessary.
- Similarly CFStringCreateExternalRepresentation() will always include a BOM character if the encoding is
- Unicode. See above for description of lossByte.
+ can be written out to disk. For instance, if the encoding is Unicode,
+ CFStringCreateFromExternalRepresentation() pays attention to the BOM character (if any)
+ and does byte swapping if necessary. Similarly CFStringCreateExternalRepresentation() will
+ include a BOM character if appropriate. See CFStringGetBytes() for more on this and lossByte.
*/
CF_EXPORT
CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding); /* May return NULL on conversion error */
/*** Comparison functions. ***/
-/* Find and compare flags; these are OR'ed together as compareOptions or searchOptions in the various functions.
- This typedef doesn't appear in the functions; instead the argument is CFOptionFlags.
+/* Find and compare flags; these are OR'ed together and provided as CFStringCompareFlags in the various functions.
*/
enum {
kCFCompareCaseInsensitive = 1,
kCFCompareNonliteral = 16, /* If specified, loose equivalence is performed (o-umlaut == o, umlaut) */
kCFCompareLocalized = 32, /* User's default locale is used for the comparisons */
kCFCompareNumerically = 64 /* Numeric comparison is used; that is, Foo2.txt < Foo7.txt < Foo25.txt */
-#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_5
+#if MAC_OS_X_VERSION_10_5 <= MAC_OS_X_VERSION_MAX_ALLOWED
,
kCFCompareDiacriticInsensitive = 128, /* If specified, ignores diacritics (o-umlaut == o) */
kCFCompareWidthInsensitive = 256, /* If specified, ignores width differences ('a' == UFF41) */
kCFCompareForcedOrdering = 512 /* If specified, comparisons are forced to return either kCFCompareLessThan or kCFCompareGreaterThan if the strings are equivalent but not strictly equal, for stability when sorting (e.g. "aaa" > "AAA" with kCFCompareCaseInsensitive specified) */
-#endif /* MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_5 */
+#endif /* MAC_OS_X_VERSION_10_5 <= MAC_OS_X_VERSION_MAX_ALLOWED */
};
typedef CFOptionFlags CFStringCompareFlags;
kCFCompareNumerically, added in 10.2, does not work if kCFCompareLocalized is specified on systems before 10.3
kCFCompareBackwards and kCFCompareAnchored are not applicable.
*/
-#if !defined (__WIN32__)
-#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_5
+#if MAC_OS_X_VERSION_10_5 <= MAC_OS_X_VERSION_MAX_ALLOWED
CF_EXPORT
-CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef theString1, CFStringRef theString2, CFRange rangeToCompare, CFOptionFlags compareOptions, CFLocaleRef locale) AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER;
-#endif /* MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_5 */
-#endif //__WIN32__
+CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef theString1, CFStringRef theString2, CFRange rangeToCompare, CFStringCompareFlags compareOptions, CFLocaleRef locale) AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER;
+#endif /* MAC_OS_X_VERSION_10_5 <= MAC_OS_X_VERSION_MAX_ALLOWED */
/* Comparison convenience. Uses the current user locale (the return value from CFLocaleCopyCurrent()) if kCFCompareLocalized.
*/
CF_EXPORT
-CFComparisonResult CFStringCompareWithOptions(CFStringRef theString1, CFStringRef theString2, CFRange rangeToCompare, CFOptionFlags compareOptions);
+CFComparisonResult CFStringCompareWithOptions(CFStringRef theString1, CFStringRef theString2, CFRange rangeToCompare, CFStringCompareFlags compareOptions);
/* Comparison convenience suitable for passing as sorting functions.
kCFCompareNumerically, added in 10.2, does not work if kCFCompareLocalized is specified on systems before 10.3
kCFCompareBackwards and kCFCompareAnchored are not applicable.
*/
CF_EXPORT
-CFComparisonResult CFStringCompare(CFStringRef theString1, CFStringRef theString2, CFOptionFlags compareOptions);
+CFComparisonResult CFStringCompare(CFStringRef theString1, CFStringRef theString2, CFStringCompareFlags compareOptions);
/* CFStringFindWithOptionsAndLocale() returns the found range in the CFRange * argument; you can pass NULL for simple discovery check.
locale == NULL indicates canonical locale (the return value from CFLocaleGetSystem()).
If stringToFind is the empty string (zero length), nothing is found.
Ignores the kCFCompareNumerically option.
*/
-#if !defined (__WIN32__)
-#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_5
+#if MAC_OS_X_VERSION_10_5 <= MAC_OS_X_VERSION_MAX_ALLOWED
CF_EXPORT
-Boolean CFStringFindWithOptionsAndLocale(CFStringRef theString, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags searchOptions, CFLocaleRef locale, CFRange *result) AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER;
-#endif /* MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_5 */
-#endif //__WIN32__
+Boolean CFStringFindWithOptionsAndLocale(CFStringRef theString, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFLocaleRef locale, CFRange *result) AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER;
+#endif /* MAC_OS_X_VERSION_10_5 <= MAC_OS_X_VERSION_MAX_ALLOWED */
/* Find convenience. Uses the current user locale (the return value from CFLocaleCopyCurrent()) if kCFCompareLocalized.
*/
CF_EXPORT
-Boolean CFStringFindWithOptions(CFStringRef theString, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags searchOptions, CFRange *result);
+Boolean CFStringFindWithOptions(CFStringRef theString, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFRange *result);
/* CFStringCreateArrayWithFindResults() returns an array of CFRange pointers, or NULL if there are no matches.
Overlapping instances are not found; so looking for "AA" in "AAA" finds just one range.
Ignores the kCFCompareNumerically option.
*/
CF_EXPORT
-CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef theString, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions);
+CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef theString, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions);
/* Find conveniences; see comments above concerning empty string and options.
*/
CF_EXPORT
-CFRange CFStringFind(CFStringRef theString, CFStringRef stringToFind, CFOptionFlags compareOptions);
+CFRange CFStringFind(CFStringRef theString, CFStringRef stringToFind, CFStringCompareFlags compareOptions);
CF_EXPORT
Boolean CFStringHasPrefix(CFStringRef theString, CFStringRef prefix);
@result true, if at least a character which is a member of the character
set is found and result is filled, otherwise, false.
*/
-CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFOptionFlags searchOptions, CFRange *result);
+CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFRange *result);
#endif
/* Find range of bounds of the line(s) that span the indicated range (startIndex, numChars),
void CFStringAppendCString(CFMutableStringRef theString, const char *cStr, CFStringEncoding encoding);
CF_EXPORT
-void CFStringAppendFormat(CFMutableStringRef theString, CFDictionaryRef formatOptions, CFStringRef format, ...);
+void CFStringAppendFormat(CFMutableStringRef theString, CFDictionaryRef formatOptions, CFStringRef format, ...) CF_FORMAT_FUNCTION(3,4);
CF_EXPORT
-void CFStringAppendFormatAndArguments(CFMutableStringRef theString, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments);
+void CFStringAppendFormatAndArguments(CFMutableStringRef theString, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) CF_FORMAT_FUNCTION(3,0);
CF_EXPORT
void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr);
Returns number of replacements performed.
*/
CF_EXPORT
-CFIndex CFStringFindAndReplace(CFMutableStringRef theString, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFOptionFlags compareOptions);
+CFIndex CFStringFindAndReplace(CFMutableStringRef theString, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFStringCompareFlags compareOptions);
#endif
CF_EXPORT void CFStringNormalize(CFMutableStringRef theString, CFStringNormalizationForm theForm);
#endif
-#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_5
+#if MAC_OS_X_VERSION_10_5 <= MAC_OS_X_VERSION_MAX_ALLOWED
/*!
@function CFStringFold
Folds the string into the form specified by the flags.
CF_EXPORT
void CFStringFold(CFMutableStringRef theString, CFOptionFlags theFlags, CFLocaleRef theLocale) AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER;
-#endif /* MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_5 */
+#endif /* MAC_OS_X_VERSION_10_5 <= MAC_OS_X_VERSION_MAX_ALLOWED */
/* Perform string transliteration. The transformation represented by transform is applied to the given range of string, modifying it in place. Only the specified range will be modified, but the transform may look at portions of the string outside that range for context. NULL range pointer causes the whole string to be transformed. On return, range is modified to reflect the new range corresponding to the original range. reverse indicates that the inverse transform should be used instead, if it exists. If the transform is successful, true is returned; if unsuccessful, false. Reasons for the transform being unsuccessful include an invalid transform identifier, or attempting to reverse an irreversible transform.
You can pass one of the predefined transforms below, or any valid ICU transform ID as defined in the ICU User Guide. Note that we do not support arbitrary set of ICU transform rules.
*/
+CF_EXPORT
Boolean CFStringTransform(CFMutableStringRef string, CFRange *range, CFStringRef transform, Boolean reverse) AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER;
/* Transform identifiers for CFStringTransform()
+/* UTF-16 surrogate support
+ */
+CF_INLINE Boolean CFStringIsSurrogateHighCharacter(UniChar character) {
+ return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false);
+}
+
+CF_INLINE Boolean CFStringIsSurrogateLowCharacter(UniChar character) {
+ return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false);
+}
+
+CF_INLINE UTF32Char CFStringGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) {
+ return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL;
+}
+// Maps a UTF-32 character to a pair of UTF-16 surrogate characters. The buffer pointed by surrogates has to have space for at least 2 UTF-16 characters. Returns true if mapped to a surrogate pair.
+CF_INLINE Boolean CFStringGetSurrogatePairForLongCharacter(UTF32Char character, UniChar *surrogates) {
+ if ((character > 0xFFFFUL) && (character < 0x110000UL)) { // Non-BMP character
+ character -= 0x10000;
+ if (NULL != surrogates) {
+ surrogates[0] = (UniChar)((character >> 10) + 0xD800UL);
+ surrogates[1] = (UniChar)((character & 0x3FF) + 0xDC00UL);
+ }
+ return true;
+ } else {
+ if (NULL != surrogates) *surrogates = (UniChar)character;
+ return false;
+ }
+}
/* Rest of the stuff in this file is private and should not be used directly
*/
-/* For debugging only
+/* For debugging only; output goes to stderr
Use CFShow() to printf the description of any CFType;
Use CFShowStr() to printf detailed info about a CFString
*/
CF_EXTERN_C_END
#endif /* ! __COREFOUNDATION_CFSTRING__ */
-