/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
- * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
- *
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
#include "CFInternal.h"
#include <CoreFoundation/CFString.h>
#include <CoreFoundation/CFByteOrder.h>
-#include "CFUtilities.h"
+#include "CFUtilitiesPriv.h"
#include <string.h>
#include "CFStringEncodingConverterExt.h"
#include "CFUniChar.h"
static UInt32 __CFWantsToUseASCIICompatibleConversion = (UInt32)-1;
CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) {
if (__CFWantsToUseASCIICompatibleConversion == (UInt32)-1) {
- __CFWantsToUseASCIICompatibleConversion = false;
+ __CFWantsToUseASCIICompatibleConversion = false;
}
return (__CFWantsToUseASCIICompatibleConversion ? kCFStringEncodingASCIICompatibleConversion : 0);
}
#define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))
#define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))
-#if defined(__BIG_ENDIAN__)
-#define SHOULD_SWAP(BOM) (BOM == 0xFFFE)
-#else
-#define SHOULD_SWAP(BOM) (BOM != 0xFEFF)
-#endif
-
/* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
};
Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
- UInt32 idx;
- const UniChar *uniChars = (const UniChar *)bytes;
- const uint8_t *chars = (const uint8_t *)bytes;
- const uint8_t *end = chars + len;
- uint16_t bom;
- Boolean allASCII = false;
if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
buffer->isASCII = !alwaysUnicode;
buffer->shouldFreeChars = false;
buffer->numChars = 0;
+
if (0 == len) return true;
buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
- switch (encoding) {
- case kCFStringEncodingUnicode:
- bom = (*uniChars == 0xfffe || *uniChars == 0xfeff) ? (*uniChars++) : 0;
- /* If the byte order mark is missing, we assume big endian... */
- len = len / 2 - (0 == bom ? 0 : 1);
-
- if (buffer->isASCII) { // Let's see if we can reduce the Unicode down to ASCII...
- if (SHOULD_SWAP(bom)) {
- for (idx = 0; idx < len; idx++) if ((uniChars[idx] & 0x80ff) != 0) {buffer->isASCII = false; break;}
- } else {
- for (idx = 0; idx < len; idx++) if (uniChars[idx] > 127) {buffer->isASCII = false; break;}
- }
+
+ if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16
+ const UTF16Char *src = (const UTF16Char *)bytes;
+ const UTF16Char *limit = (const UTF16Char *)(bytes + len);
+ bool swap = false;
+
+ if (kCFStringEncodingUTF16 == encoding) {
+ UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);
+
+#if defined(__BIG_ENDIAN__)
+ if (bom == 0xFFFE) swap = true;
+#else
+ if (bom != 0xFEFF) swap = true;
+#endif
+ if (bom) useClientsMemoryPtr = NULL;
+ } else {
+#if defined(__BIG_ENDIAN__)
+ if (kCFStringEncodingUTF16LE == encoding) swap = true;
+#else
+ if (kCFStringEncodingUTF16BE == encoding) swap = true;
+#endif
}
- if (buffer->isASCII) {
- buffer->numChars = len;
- buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
- buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
- if (SHOULD_SWAP(bom)) { // !!! Can be somewhat trickier here and use a single loop with a properly inited ptr
- for (idx = 0; idx < len; idx++) buffer->chars.ascii[idx] = (uniChars[idx] >> 8);
- } else {
- for (idx = 0; idx < len; idx++) buffer->chars.ascii[idx] = uniChars[idx];
- }
+ buffer->numChars = limit - src;
+
+ if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
+ *useClientsMemoryPtr = true;
+ buffer->chars.unicode = (UniChar *)src;
+ buffer->isASCII = false;
} else {
- buffer->numChars = len;
- if (useClientsMemoryPtr && (bom == 0) && !SHOULD_SWAP(bom)) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
- *useClientsMemoryPtr = true;
- buffer->shouldFreeChars = false;
- buffer->chars.unicode = (UniChar *)bytes;
- } else {
- buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
- buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
- if (SHOULD_SWAP(bom)) {
- for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = CFSwapInt16(uniChars[idx]);
- } else {
- memmove(buffer->chars.unicode, uniChars, len * sizeof(UniChar));
+ if (buffer->isASCII) { // Let's see if we can reduce the Unicode down to ASCII...
+ const UTF16Char *characters = src;
+ UTF16Char mask = (swap ? 0x80FF : 0xFF80);
+
+ while (characters < limit) {
+ if (*(characters++) & mask) {
+ buffer->isASCII = false;
+ break;
+ }
}
}
- }
- return true;
-
- case kCFStringEncodingNonLossyASCII: {
- UTF16Char currentValue = 0;
- uint8_t character;
- int8_t mode = __NSNonLossyASCIIMode;
-
- buffer->isASCII = false;
- buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
- buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
- buffer->numChars = 0;
-
- while (chars < end) {
- character = (*chars++);
-
- switch (mode) {
- case __NSNonLossyASCIIMode:
- if (character == '\\') {
- mode = __NSNonLossyBackslashMode;
- } else if (character < 0x80) {
- currentValue = character;
- } else {
- mode = __NSNonLossyErrorMode;
+
+ if (buffer->isASCII) {
+ uint8_t *dst;
+ if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
+ if (buffer->numChars > MAX_LOCAL_CHARS) {
+ buffer->chars.ascii = CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
+ buffer->shouldFreeChars = true;
+ } else {
+ buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
+ }
}
- break;
-
- case __NSNonLossyBackslashMode:
- if ((character == 'U') || (character == 'u')) {
- mode = __NSNonLossyHexInitialMode;
- currentValue = 0;
- } else if ((character >= '0') && (character <= '9')) {
- mode = __NSNonLossyOctalInitialMode;
- currentValue = character - '0';
- } else if (character == '\\') {
- mode = __NSNonLossyASCIIMode;
- currentValue = character;
+ dst = buffer->chars.ascii;
+
+ if (swap) {
+ while (src < limit) *(dst++) = (*(src++) >> 8);
} else {
- mode = __NSNonLossyErrorMode;
+ while (src < limit) *(dst++) = *(src++);
}
- break;
+ } else {
+ UTF16Char *dst;
- default:
- if (mode < __NSNonLossyHexFinalMode) {
- if ((character >= '0') && (character <= '9')) {
- currentValue = (currentValue << 4) | (character - '0');
- if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
+ if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
+ if (buffer->numChars > MAX_LOCAL_UNICHARS) {
+ buffer->chars.unicode = CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
+ buffer->shouldFreeChars = true;
} else {
- if (character >= 'a') character -= ('a' - 'A');
- if ((character >= 'A') && (character <= 'F')) {
- currentValue = (currentValue << 4) | ((character - 'A') + 10);
- if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
- } else {
- mode = __NSNonLossyErrorMode;
- }
+ buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
}
+ }
+ dst = buffer->chars.unicode;
+
+ if (swap) {
+ while (src < limit) *(dst++) = CFSwapInt16(*(src++));
} else {
- if ((character >= '0') && (character <= '9')) {
- currentValue = (currentValue << 3) | (character - '0');
- if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
- } else {
- mode = __NSNonLossyErrorMode;
- }
+ memmove(dst, src, buffer->numChars * sizeof(UTF16Char));
}
- break;
}
+ }
+ } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
+ const UTF32Char *src = (const UTF32Char *)bytes;
+ const UTF32Char *limit = (const UTF32Char *)(bytes + len);
+ bool swap = false;
- if (mode == __NSNonLossyASCIIMode) {
- buffer->chars.unicode[buffer->numChars++] = currentValue;
- } else if (mode == __NSNonLossyErrorMode) {
- return false;
- }
- }
- return (mode == __NSNonLossyASCIIMode);
- }
+ if (kCFStringEncodingUTF32 == encoding) {
+ UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);
- case kCFStringEncodingUTF8:
- if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) { // If UTF8 BOM, skip
- chars += 3;
- len -= 3;
- if (0 == len) return true;
+#if defined(__BIG_ENDIAN__)
+ if (bom == 0xFFFE0000) swap = true;
+#else
+ if (bom != 0x0000FEFF) swap = true;
+#endif
+ } else {
+#if defined(__BIG_ENDIAN__)
+ if (kCFStringEncodingUTF32LE == encoding) swap = true;
+#else
+ if (kCFStringEncodingUTF32BE == encoding) swap = true;
+#endif
}
- allASCII = !alwaysUnicode;
- if (allASCII) {
- for (idx = 0; idx < len; idx++) {
- if (128 <= chars[idx]) {
- allASCII = false;
- break;
+
+ buffer->numChars = limit - src;
+
+ {
+ // Let's see if we have non-ASCII or non-BMP
+ const UTF32Char *characters = src;
+ UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);
+ UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);
+
+ while (characters < limit) {
+ if (*characters & asciiMask) {
+ buffer->isASCII = false;
+ if (*characters & bmpMask) ++(buffer->numChars);
}
+ ++characters;
}
}
- buffer->isASCII = allASCII;
- if (allASCII) {
- buffer->numChars = len;
- buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
- buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
- memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
- } else {
- UInt32 numDone;
- static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
-
- if (!__CFFromUTF8) {
- const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
- __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
+
+ if (buffer->isASCII) {
+ uint8_t *dst;
+ if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
+ if (buffer->numChars > MAX_LOCAL_CHARS) {
+ buffer->chars.ascii = CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
+ buffer->shouldFreeChars = true;
+ } else {
+ buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
+ }
}
+ dst = buffer->chars.ascii;
+ if (swap) {
+ while (src < limit) *(dst++) = (*(src++) >> 24);
+ } else {
+ while (src < limit) *(dst++) = *(src++);
+ }
+ } else {
+ if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
+ if (buffer->numChars > MAX_LOCAL_UNICHARS) {
+ buffer->chars.unicode = CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
+ buffer->shouldFreeChars = true;
+ } else {
+ buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
+ }
+ }
+ CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, false,
+#if defined(__BIG_ENDIAN__)
+ !swap
+#else
+ swap
+#endif
+ );
+ }
+ } else {
+ UInt32 idx;
+ const uint8_t *chars = (const uint8_t *)bytes;
+ const uint8_t *end = chars + len;
+
+ switch (encoding) {
+ case kCFStringEncodingNonLossyASCII: {
+ UTF16Char currentValue = 0;
+ uint8_t character;
+ int8_t mode = __NSNonLossyASCIIMode;
+
+ buffer->isASCII = false;
buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
buffer->numChars = 0;
+
while (chars < end) {
- numDone = 0;
- chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
-
- if (0 == numDone) {
- if (buffer->shouldFreeChars) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
- buffer->isASCII = !alwaysUnicode;
- buffer->shouldFreeChars = false;
- buffer->chars.ascii = NULL;
- buffer->numChars = 0;
+ character = (*chars++);
+
+ switch (mode) {
+ case __NSNonLossyASCIIMode:
+ if (character == '\\') {
+ mode = __NSNonLossyBackslashMode;
+ } else if (character < 0x80) {
+ currentValue = character;
+ } else {
+ mode = __NSNonLossyErrorMode;
+ }
+ break;
+
+ case __NSNonLossyBackslashMode:
+ if ((character == 'U') || (character == 'u')) {
+ mode = __NSNonLossyHexInitialMode;
+ currentValue = 0;
+ } else if ((character >= '0') && (character <= '9')) {
+ mode = __NSNonLossyOctalInitialMode;
+ currentValue = character - '0';
+ } else if (character == '\\') {
+ mode = __NSNonLossyASCIIMode;
+ currentValue = character;
+ } else {
+ mode = __NSNonLossyErrorMode;
+ }
+ break;
+
+ default:
+ if (mode < __NSNonLossyHexFinalMode) {
+ if ((character >= '0') && (character <= '9')) {
+ currentValue = (currentValue << 4) | (character - '0');
+ if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
+ } else {
+ if (character >= 'a') character -= ('a' - 'A');
+ if ((character >= 'A') && (character <= 'F')) {
+ currentValue = (currentValue << 4) | ((character - 'A') + 10);
+ if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
+ } else {
+ mode = __NSNonLossyErrorMode;
+ }
+ }
+ } else {
+ if ((character >= '0') && (character <= '9')) {
+ currentValue = (currentValue << 3) | (character - '0');
+ if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
+ } else {
+ mode = __NSNonLossyErrorMode;
+ }
+ }
+ break;
+ }
+
+ if (mode == __NSNonLossyASCIIMode) {
+ buffer->chars.unicode[buffer->numChars++] = currentValue;
+ } else if (mode == __NSNonLossyErrorMode) {
return false;
}
- buffer->numChars += numDone;
}
+ return (mode == __NSNonLossyASCIIMode);
}
- return true;
-
- default:
- if (CFStringEncodingIsValidEncoding(encoding)) {
- const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
- Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
-
- if (!converter) return false;
-
- if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
- allASCII = !alwaysUnicode && isASCIISuperset;
- if (allASCII) {
- for (idx = 0; idx < len; idx++) {
- if (128 <= chars[idx]) {
- allASCII = false;
- break;
- }
+
+ case kCFStringEncodingUTF8:
+ if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) { // If UTF8 BOM, skip
+ chars += 3;
+ len -= 3;
+ if (0 == len) return true;
+ }
+ if (buffer->isASCII) {
+ for (idx = 0; idx < len; idx++) {
+ if (128 <= chars[idx]) {
+ buffer->isASCII = false;
+ break;
+ }
+ }
+ }
+ if (buffer->isASCII) {
+ buffer->numChars = len;
+ buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
+ buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
+ memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
+ } else {
+ UInt32 numDone;
+ static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
+
+ if (!__CFFromUTF8) {
+ const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
+ __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
+ }
+
+ buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
+ buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
+ buffer->numChars = 0;
+ while (chars < end) {
+ numDone = 0;
+ chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
+
+ if (0 == numDone) {
+ if (buffer->shouldFreeChars) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
+ buffer->isASCII = !alwaysUnicode;
+ buffer->shouldFreeChars = false;
+ buffer->chars.ascii = NULL;
+ buffer->numChars = 0;
+ return false;
+ }
+ buffer->numChars += numDone;
+ }
+ }
+ break;
+
+ default:
+ if (CFStringEncodingIsValidEncoding(encoding)) {
+ const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
+ Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
+
+ if (!converter) return false;
+
+ if (!isASCIISuperset) buffer->isASCII = false;
+
+ if (buffer->isASCII) {
+ for (idx = 0; idx < len; idx++) {
+ if (128 <= chars[idx]) {
+ buffer->isASCII = false;
+ break;
}
}
- buffer->isASCII = allASCII;
- if (allASCII) {
+ }
+
+ if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
+ if (buffer->isASCII) {
buffer->numChars = len;
buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
return false;
}
}
- return true;
- } else {
- allASCII = !alwaysUnicode && isASCIISuperset;
- if (allASCII) {
- for (idx = 0; idx < len; idx++)
- if (128 <= chars[idx]) {
- allASCII = false;
- break;
- }
- }
- buffer->isASCII = allASCII;
- if (allASCII) {
- buffer->numChars = len;
- buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
- buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
- memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
} else {
- UInt32 guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
- static UInt32 lossyFlag = (UInt32)-1;
-
- buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
- buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
-
- if (lossyFlag == (UInt32)-1) lossyFlag = (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? 0 : kCFStringEncodingAllowLossyConversion);
-
- if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) {
- if (buffer->shouldFreeChars) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
- buffer->isASCII = !alwaysUnicode;
- buffer->shouldFreeChars = false;
- buffer->chars.ascii = NULL;
- buffer->numChars = 0;
- return false;
+ if (buffer->isASCII) {
+ buffer->numChars = len;
+ buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
+ buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
+ memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
+ } else {
+ UInt32 guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
+ static UInt32 lossyFlag = (UInt32)-1;
+
+ buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
+ buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
+
+ if (lossyFlag == (UInt32)-1) lossyFlag = (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? 0 : kCFStringEncodingAllowLossyConversion);
+
+ if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) {
+ if (buffer->shouldFreeChars) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
+ buffer->isASCII = !alwaysUnicode;
+ buffer->shouldFreeChars = false;
+ buffer->chars.ascii = NULL;
+ buffer->numChars = 0;
+ return false;
+ }
}
}
- return true;
+ } else {
+ return false;
}
- } else {
- return false;
}
}
+
+ return true;
}
totalBytesWritten += reqLength;
numCharsProcessed++;
}
- } else if (encoding == kCFStringEncodingUnicode) {
- CFIndex extraForBOM = generatingExternalFile ? sizeof(UniChar) : 0;
+ } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
+ CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
numCharsProcessed = rangeLen;
if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
}
totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
if (buffer) {
- if (generatingExternalFile) { /* Generate BOM */
+ if (extraForBOM) { /* Generate BOM */
#if defined(__BIG_ENDIAN__)
*buffer++ = 0xfe; *buffer++ = 0xff;
#else
#endif
}
CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
+ if (
+#if defined(__BIG_ENDIAN__)
+ kCFStringEncodingUTF16LE
+#else
+ kCFStringEncodingUTF16BE
+#endif
+ == encoding) { // Need to swap
+ UTF16Char *characters = (UTF16Char *)buffer;
+ const UTF16Char *limit = characters + numCharsProcessed;
+
+ while (characters < limit) {
+ *characters = CFSwapInt16(*characters);
+ ++characters;
+ }
+ }
}
+ } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
+ UTF32Char character;
+ CFStringInlineBuffer buf;
+ UTF32Char *characters = (UTF32Char *)buffer;
+
+#if defined(__BIG_ENDIAN__)
+ bool swap = (encoding == kCFStringEncodingUTF32LE ? true : false);
+#else
+ bool swap = (encoding == kCFStringEncodingUTF32BE ? true : false);
+#endif
+
+ if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
+ totalBytesWritten += sizeof(UTF32Char);
+ if (characters) {
+ if (totalBytesWritten > max) { // insufficient buffer
+ totalBytesWritten = 0;
+ } else {
+#if defined(__BIG_ENDIAN__)
+ *(characters++) = 0x0000FEFF;
+#else
+ *(characters++) = 0xFFFE0000;
+#endif
+ }
+ }
+ }
+
+ CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
+ while (numCharsProcessed < rangeLen) {
+ character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
+
+ if (CFUniCharIsSurrogateHighCharacter(character)) {
+ UTF16Char otherCharacter;
+
+ if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
+ character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
+ } else if (lossByte) {
+ character = lossByte;
+ } else {
+ break;
+ }
+ } else if (CFUniCharIsSurrogateLowCharacter(character)) {
+ if (lossByte) {
+ character = lossByte;
+ } else {
+ break;
+ }
+ }
+
+ totalBytesWritten += sizeof(UTF32Char);
+
+ if (characters) {
+ if (totalBytesWritten > max) {
+ totalBytesWritten -= sizeof(UTF32Char);
+ break;
+ }
+ *(characters++) = (swap ? CFSwapInt32(character) : character);
+ }
+
+ numCharsProcessed += (character > 0xFFFF ? 2 : 1);
+ }
} else {
CFIndex numChars;
UInt32 flags;
const unsigned char *cString = NULL;
+ BOOL isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
- if (!CF_IS_OBJC(CFStringGetTypeID(), string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // Checking for NSString to avoid infinite recursion
+ if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
const unsigned char *ptr;
if ((cString = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
ptr = (cString += rangeLoc);
if (!buffer) max = 0;
// Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
- flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | __CFGetASCIICompatibleFlag();
+ // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
+ flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());
if (!cString && (cString = (const char*)CFStringGetCharactersPtr(string))) { // Must be Unicode string
if (CFStringEncodingIsValidEncoding(encoding)) { // Converter available in CF
return numCharsProcessed;
}
-#define MAX_STACK_BUFFER_LEN (255)
-CF_EXPORT Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
+CFStringRef CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc, const char *buffer) {
+ return CFStringCreateWithCString(alloc, buffer, CFStringFileSystemEncoding());
+}
+
+CFIndex CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string) {
+ CFIndex len = CFStringGetLength(string);
+ CFStringEncoding enc = CFStringGetFastestEncoding(string);
+ switch (enc) {
+ case kCFStringEncodingASCII:
+ case kCFStringEncodingMacRoman:
+ return len * 3 + 1;
+ default:
+ return len * 9 + 1;
+ }
+}
+
+Boolean CFStringGetFileSystemRepresentation(CFStringRef string, char *buffer, CFIndex maxBufLen) {
#if defined(__MACH__)
+#define MAX_STACK_BUFFER_LEN (255)
const UTF16Char *characters = CFStringGetCharactersPtr(string);
uint32_t usedBufLen;
range.length = (length < MAX_STACK_BUFFER_LEN ? length : MAX_STACK_BUFFER_LEN);
}
} else {
- UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN]; // C99 Variable array
+ UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
CFStringGetCharacters(string, CFRangeMake(0, length), charactersBuffer);
if (!CFUniCharDecompose(charactersBuffer, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
#endif __MACH__
}
+
+Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
+ return CFStringGetFileSystemRepresentation(string, buffer, maxBufLen);
+}
+