--- /dev/null
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/*
+ * CFICUConverters.c
+ * CoreFoundation
+ *
+ * Created by Aki Inoue on 07/12/04.
+ * Copyright 2007-2009, Apple Inc. All rights reserved.
+ *
+ */
+
+#include "CFStringEncodingDatabase.h"
+#include "CFStringEncodingConverterPriv.h"
+#include "CFICUConverters.h"
+#include <CoreFoundation/CFStringEncodingExt.h>
+#include <unicode/ucnv.h>
+#include <unicode/uversion.h>
+#include "CFInternal.h"
+#include <stdio.h>
+
+#if DEPLOYMENT_TARGET_WINDOWS
+#define strncasecmp_l(a, b, c, d) _strnicmp(a, b, c)
+#define snprintf _snprintf
+#endif
+
+// Thread data support
+typedef struct {
+ uint8_t _numSlots;
+ uint8_t _nextSlot;
+ UConverter **_converters;
+} __CFICUThreadData;
+
+static void __CFICUThreadDataDestructor(void *context) {
+ __CFICUThreadData * data = (__CFICUThreadData *)context;
+
+ if (NULL != data->_converters) { // scan to make sure deallocation
+ UConverter **converter = data->_converters;
+ UConverter **limit = converter + data->_numSlots;
+
+ while (converter < limit) {
+ if (NULL != converter) ucnv_close(*converter);
+ ++converter;
+ }
+ CFAllocatorDeallocate(NULL, data->_converters);
+ }
+
+ CFAllocatorDeallocate(NULL, data);
+}
+
+#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
+#import <pthread.h>
+
+CF_INLINE __CFICUThreadData *__CFStringEncodingICUGetThreadData() {
+ __CFICUThreadData * data;
+
+ pthread_key_init_np(__CFTSDKeyICUConverter, __CFICUThreadDataDestructor);
+ data = (__CFICUThreadData *)pthread_getspecific(__CFTSDKeyICUConverter);
+
+ if (NULL == data) {
+ data = (__CFICUThreadData *)CFAllocatorAllocate(NULL, sizeof(__CFICUThreadData), 0);
+ memset(data, 0, sizeof(__CFICUThreadData));
+ pthread_setspecific(__CFTSDKeyICUConverter, (const void *)data);
+ }
+
+ return data;
+}
+#elif DEPLOYMENT_TARGET_WINDOWS
+__private_extern__ void __CFStringEncodingICUThreadDataCleaner(void *context) { __CFICUThreadDataDestructor(context); }
+
+CF_INLINE __CFICUThreadData *__CFStringEncodingICUGetThreadData() {
+ __CFThreadSpecificData *threadData = __CFGetThreadSpecificData_inline();
+
+ if (NULL == threadData->_icuThreadData) {
+ threadData->_icuThreadData = (__CFICUThreadData *)CFAllocatorAllocate(NULL, sizeof(__CFICUThreadData), 0);
+ memset(threadData->_icuThreadData, 0, sizeof(__CFICUThreadData));
+ }
+
+ return (__CFICUThreadData *)threadData->_icuThreadData;
+}
+#else
+#error Need implementation for thread data
+#endif
+
+__private_extern__ const char *__CFStringEncodingGetICUName(CFStringEncoding encoding) {
+#define STACK_BUFFER_SIZE (60)
+ char buffer[STACK_BUFFER_SIZE];
+ const char *result = NULL;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ uint32_t codepage = 0;
+
+ if (kCFStringEncodingUTF7_IMAP == encoding) return "IMAP-mailbox-name";
+
+ if (kCFStringEncodingUnicode != (encoding & 0x0F00)) codepage = __CFStringEncodingGetWindowsCodePage(encoding); // we don't use codepage for UTF to avoid little endian weirdness of Windows
+
+ if ((0 != codepage) && (snprintf(buffer, STACK_BUFFER_SIZE, "windows-%d", codepage) < STACK_BUFFER_SIZE) && (NULL != (result = ucnv_getAlias(buffer, 0, &errorCode)))) return result;
+
+ if (__CFStringEncodingGetCanonicalName(encoding, buffer, STACK_BUFFER_SIZE)) result = ucnv_getAlias(buffer, 0, &errorCode);
+
+ return result;
+#undef STACK_BUFFER_SIZE
+}
+
+__private_extern__ CFStringEncoding __CFStringEncodingGetFromICUName(const char *icuName) {
+ uint32_t codepage;
+ UErrorCode errorCode = U_ZERO_ERROR;
+
+ if ((0 == strncasecmp_l(icuName, "windows-", strlen("windows-"), NULL)) && (0 != (codepage = strtol(icuName + strlen("windows-"), NULL, 10)))) return __CFStringEncodingGetFromWindowsCodePage(codepage);
+
+ if (0 != ucnv_countAliases(icuName, &errorCode)) {
+ CFStringEncoding encoding;
+ const char *name;
+
+ // Try WINDOWS platform
+ name = ucnv_getStandardName(icuName, "WINDOWS", &errorCode);
+
+ if (NULL != name) {
+ if ((0 == strncasecmp_l(name, "windows-", strlen("windows-"), NULL)) && (0 != (codepage = strtol(name + strlen("windows-"), NULL, 10)))) return __CFStringEncodingGetFromWindowsCodePage(codepage);
+
+ if (strncasecmp_l(icuName, name, strlen(name), NULL) && (kCFStringEncodingInvalidId != (encoding = __CFStringEncodingGetFromCanonicalName(name)))) return encoding;
+ }
+
+ // Try JAVA platform
+ name = ucnv_getStandardName(icuName, "JAVA", &errorCode);
+ if ((NULL != name) && strncasecmp_l(icuName, name, strlen(name), NULL) && (kCFStringEncodingInvalidId != (encoding = __CFStringEncodingGetFromCanonicalName(name)))) return encoding;
+
+ // Try MIME platform
+ name = ucnv_getStandardName(icuName, "MIME", &errorCode);
+ if ((NULL != name) && strncasecmp_l(icuName, name, strlen(name), NULL) && (kCFStringEncodingInvalidId != (encoding = __CFStringEncodingGetFromCanonicalName(name)))) return encoding;
+ }
+
+ return kCFStringEncodingInvalidId;
+}
+
+CF_INLINE UConverter *__CFStringEncodingConverterCreateICUConverter(const char *icuName, uint32_t flags, bool toUnicode) {
+ UConverter *converter;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ uint8_t streamID = CFStringEncodingStreamIDFromMask(flags);
+
+ if (0 != streamID) { // this is a part of streaming previously created
+ __CFICUThreadData *data = __CFStringEncodingICUGetThreadData();
+
+ --streamID; // map to array index
+
+ if ((streamID < data->_numSlots) && (NULL != data->_converters[streamID])) return data->_converters[streamID];
+ }
+
+ converter = ucnv_open(icuName, &errorCode);
+
+ if (NULL != converter) {
+ char lossyByte = CFStringEncodingMaskToLossyByte(flags);
+
+ if ((0 == lossyByte) && (0 != (flags & kCFStringEncodingAllowLossyConversion))) lossyByte = '?';
+
+ if (0 ==lossyByte) {
+ if (toUnicode) {
+ ucnv_setToUCallBack(converter, &UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
+ } else {
+ ucnv_setFromUCallBack(converter, &UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
+ }
+ } else {
+ ucnv_setSubstChars(converter, &lossyByte, 1, &errorCode);
+ }
+ }
+
+ return converter;
+}
+
+#define ICU_CONVERTER_SLOT_INCREMENT (10)
+#define ICU_CONVERTER_MAX_SLOT (255)
+
+static CFIndex __CFStringEncodingConverterReleaseICUConverter(UConverter *converter, uint32_t flags, CFIndex status) {
+ uint8_t streamID = CFStringEncodingStreamIDFromMask(flags);
+
+ if ((kCFStringEncodingInvalidInputStream != status) && ((0 != (flags & kCFStringEncodingPartialInput)) || ((kCFStringEncodingInsufficientOutputBufferLength == status) && (0 != (flags & kCFStringEncodingPartialOutput))))) {
+ if (0 == streamID) {
+ __CFICUThreadData *data = __CFStringEncodingICUGetThreadData();
+
+ if (NULL == data->_converters) {
+ data->_converters = (UConverter **)CFAllocatorAllocate(NULL, sizeof(UConverter *) * ICU_CONVERTER_SLOT_INCREMENT, 0);
+ memset(data->_converters, 0, sizeof(UConverter *) * ICU_CONVERTER_SLOT_INCREMENT);
+ data->_numSlots = ICU_CONVERTER_SLOT_INCREMENT;
+ data->_nextSlot = 0;
+ } else if ((data->_nextSlot >= data->_numSlots) || (NULL != data->_converters[data->_nextSlot])) { // Need to find one
+ CFIndex index;
+
+ for (index = 0;index < data->_numSlots;index++) {
+ if (NULL == data->_converters[index]) {
+ data->_nextSlot = index;
+ break;
+ }
+ }
+
+ if (index >= data->_numSlots) { // we're full
+ UConverter **newConverters;
+ CFIndex newSize = data->_numSlots + ICU_CONVERTER_SLOT_INCREMENT;
+
+ if (newSize > ICU_CONVERTER_MAX_SLOT) { // something is terribly wrong
+ CFLog(kCFLogLevelError, CFSTR("Per-thread streaming ID for ICU converters exhausted. Ignoring..."));
+ ucnv_close(converter);
+ return 0;
+ }
+
+ newConverters = (UConverter **)CFAllocatorAllocate(NULL, sizeof(UConverter *) * newSize, 0);
+ memset(newConverters, 0, sizeof(UConverter *) * newSize);
+ memcpy(newConverters, data->_converters, sizeof(UConverter *) * data->_numSlots);
+ CFAllocatorDeallocate(NULL, data->_converters);
+ data->_converters = newConverters;
+ data->_nextSlot = data->_numSlots;
+ data->_numSlots = newSize;
+ }
+ }
+
+ data->_converters[data->_nextSlot] = converter;
+ streamID = data->_nextSlot + 1;
+
+ // now find next slot
+ ++data->_nextSlot;
+
+ if ((data->_nextSlot >= data->_numSlots) || (NULL != data->_converters[data->_nextSlot])) {
+ data->_nextSlot = 0;
+
+ while ((data->_nextSlot < data->_numSlots) && (NULL != data->_converters[data->_nextSlot])) ++data->_nextSlot;
+ }
+ }
+
+ return CFStringEncodingStreamIDToMask(streamID);
+ }
+
+ if (0 != streamID) {
+ __CFICUThreadData *data = __CFStringEncodingICUGetThreadData();
+
+ --streamID; // map to array index
+
+ if ((streamID < data->_numSlots) && (converter == data->_converters[streamID])) {
+ data->_converters[streamID] = NULL;
+ if (data->_nextSlot > streamID) data->_nextSlot = streamID;
+ }
+ }
+
+ ucnv_close(converter);
+
+ return 0;
+}
+
+#define MAX_BUFFER_SIZE (1000)
+
+#if (U_ICU_VERSION_MAJOR_NUM > 4) || ((U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM > 0))
+#warning Unknown ICU version. Check binary compatibility issues for rdar://problem/6024743
+#endif
+#define HAS_ICU_BUG_6024743 (1)
+#define HAS_ICU_BUG_6025527 (1)
+
+__private_extern__ CFIndex __CFStringEncodingICUToBytes(const char *icuName, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
+ UConverter *converter;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ const UTF16Char *source = characters;
+ const UTF16Char *sourceLimit = source + numChars;
+ char *destination = (char *)bytes;
+ const char *destinationLimit = destination + maxByteLen;
+ bool flush = ((0 == (flags & kCFStringEncodingPartialInput)) ? true : false);
+ CFIndex status;
+
+ if (NULL == (converter = __CFStringEncodingConverterCreateICUConverter(icuName, flags, false))) return kCFStringEncodingConverterUnavailable;
+
+ if (0 == maxByteLen) {
+ char buffer[MAX_BUFFER_SIZE];
+ CFIndex totalLength = 0;
+
+ while ((source < sourceLimit) && (U_ZERO_ERROR == errorCode)) {
+ destination = buffer;
+ destinationLimit = destination + MAX_BUFFER_SIZE;
+
+ ucnv_fromUnicode(converter, &destination, destinationLimit, (const UChar **)&source, (const UChar *)sourceLimit, NULL, flush, &errorCode);
+
+ totalLength += (destination - buffer);
+
+ if (U_BUFFER_OVERFLOW_ERROR == errorCode) errorCode = U_ZERO_ERROR;
+ }
+
+ if (NULL != usedByteLen) *usedByteLen = totalLength;
+ } else {
+ ucnv_fromUnicode(converter, &destination, destinationLimit, (const UChar **)&source, (const UChar *)sourceLimit, NULL, flush, &errorCode);
+
+ if (NULL != usedByteLen) *usedByteLen = destination - (const char *)bytes;
+ }
+
+ status = ((U_ZERO_ERROR == errorCode) ? kCFStringEncodingConversionSuccess : ((U_BUFFER_OVERFLOW_ERROR == errorCode) ? kCFStringEncodingInsufficientOutputBufferLength : kCFStringEncodingInvalidInputStream));
+
+ if (NULL != usedCharLen) {
+#if HAS_ICU_BUG_6024743
+/* ICU has a serious behavioral inconsistency issue that the source pointer returned from ucnv_fromUnicode() is after illegal input. We have to keep track of any changes in this area in order to prevent future binary compatiibility issues */
+ if (kCFStringEncodingInvalidInputStream == status) {
+#define MAX_ERROR_BUFFER_LEN (32)
+ UTF16Char errorBuffer[MAX_ERROR_BUFFER_LEN];
+ int8_t errorLength = MAX_ERROR_BUFFER_LEN;
+#undef MAX_ERROR_BUFFER_LEN
+
+ errorCode = U_ZERO_ERROR;
+
+ ucnv_getInvalidUChars(converter, (UChar *)errorBuffer, &errorLength, &errorCode);
+
+ if (U_ZERO_ERROR == errorCode) {
+ source -= errorLength;
+ } else {
+ // Gah, something is terribly wrong. Reset everything
+ source = characters; // 0 length
+ if (NULL != usedByteLen) *usedByteLen = 0;
+ }
+ }
+#endif
+ *usedCharLen = source - characters;
+ }
+
+ status |= __CFStringEncodingConverterReleaseICUConverter(converter, flags, status);
+
+ return status;
+}
+
+__private_extern__ CFIndex __CFStringEncodingICUToUnicode(const char *icuName, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, CFIndex *usedByteLen, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
+ UConverter *converter;
+ UErrorCode errorCode = U_ZERO_ERROR;
+ const char *source = (const char *)bytes;
+ const char *sourceLimit = source + numBytes;
+ UTF16Char *destination = characters;
+ const UTF16Char *destinationLimit = destination + maxCharLen;
+ bool flush = ((0 == (flags & kCFStringEncodingPartialInput)) ? true : false);
+ CFIndex status;
+
+ if (NULL == (converter = __CFStringEncodingConverterCreateICUConverter(icuName, flags, true))) return kCFStringEncodingConverterUnavailable;
+
+ if (0 == maxCharLen) {
+ UTF16Char buffer[MAX_BUFFER_SIZE];
+ CFIndex totalLength = 0;
+
+ while ((source < sourceLimit) && (U_ZERO_ERROR == errorCode)) {
+ destination = buffer;
+ destinationLimit = destination + MAX_BUFFER_SIZE;
+
+ ucnv_toUnicode(converter, (UChar **)&destination, (const UChar *)destinationLimit, &source, sourceLimit, NULL, flush, &errorCode);
+
+ totalLength += (destination - buffer);
+
+ if (U_BUFFER_OVERFLOW_ERROR == errorCode) errorCode = U_ZERO_ERROR;
+ }
+
+ if (NULL != usedCharLen) *usedCharLen = totalLength;
+ } else {
+ ucnv_toUnicode(converter, (UChar **)&destination, (const UChar *)destinationLimit, &source, sourceLimit, NULL, flush, &errorCode);
+
+ if (NULL != usedCharLen) *usedCharLen = destination - characters;
+ }
+
+ status = ((U_ZERO_ERROR == errorCode) ? kCFStringEncodingConversionSuccess : ((U_BUFFER_OVERFLOW_ERROR == errorCode) ? kCFStringEncodingInsufficientOutputBufferLength : kCFStringEncodingInvalidInputStream));
+
+ if (NULL != usedByteLen) {
+#if HAS_ICU_BUG_6024743
+ /* ICU has a serious behavioral inconsistency issue that the source pointer returned from ucnv_toUnicode() is after illegal input. We have to keep track of any changes in this area in order to prevent future binary compatiibility issues */
+ if (kCFStringEncodingInvalidInputStream == status) {
+#define MAX_ERROR_BUFFER_LEN (32)
+ char errorBuffer[MAX_ERROR_BUFFER_LEN];
+ int8_t errorLength = MAX_ERROR_BUFFER_LEN;
+#undef MAX_ERROR_BUFFER_LEN
+
+ errorCode = U_ZERO_ERROR;
+
+ ucnv_getInvalidChars(converter, errorBuffer, &errorLength, &errorCode);
+
+ if (U_ZERO_ERROR == errorCode) {
+#if HAS_ICU_BUG_6025527
+ // Another ICU oddness here. ucnv_getInvalidUChars() writes the '\0' terminator, and errorLength includes the extra byte.
+ if ((errorLength > 0) && ('\0' == errorBuffer[errorLength - 1])) --errorLength;
+#endif
+ source -= errorLength;
+ } else {
+ // Gah, something is terribly wrong. Reset everything
+ source = (const char *)bytes; // 0 length
+ if (NULL != usedCharLen) *usedCharLen = 0;
+ }
+ }
+#endif
+
+ *usedByteLen = source - (const char *)bytes;
+ }
+
+ status |= __CFStringEncodingConverterReleaseICUConverter(converter, flags, status);
+
+ return status;
+}
+
+__private_extern__ CFIndex __CFStringEncodingICUCharLength(const char *icuName, uint32_t flags, const uint8_t *bytes, CFIndex numBytes) {
+ CFIndex usedCharLen;
+ return (__CFStringEncodingICUToUnicode(icuName, flags, bytes, numBytes, NULL, NULL, 0, &usedCharLen) == kCFStringEncodingConversionSuccess ? usedCharLen : 0);
+}
+
+__private_extern__ CFIndex __CFStringEncodingICUByteLength(const char *icuName, uint32_t flags, const UniChar *characters, CFIndex numChars) {
+ CFIndex usedByteLen;
+ return (__CFStringEncodingICUToBytes(icuName, flags, characters, numChars, NULL, NULL, 0, &usedByteLen) == kCFStringEncodingConversionSuccess ? usedByteLen : 0);
+}
+
+__private_extern__ CFStringEncoding *__CFStringEncodingCreateICUEncodings(CFAllocatorRef allocator, CFIndex *numberOfIndex) {
+ CFIndex count = ucnv_countAvailable();
+ CFIndex numEncodings = 0;
+ CFStringEncoding *encodings;
+ CFStringEncoding encoding;
+ CFIndex index;
+
+ if (0 == count) return NULL;
+
+ encodings = (CFStringEncoding *)CFAllocatorAllocate(NULL, sizeof(CFStringEncoding) * count, 0);
+
+ for (index = 0;index < count;index++) {
+ encoding = __CFStringEncodingGetFromICUName(ucnv_getAvailableName(index));
+
+ if (kCFStringEncodingInvalidId != encoding) encodings[numEncodings++] = encoding;
+ }
+
+ if (0 == numEncodings) {
+ CFAllocatorDeallocate(allocator, encodings);
+ encodings = NULL;
+ }
+
+ *numberOfIndex = numEncodings;
+
+ return encodings;
+}