2 * Copyright (c) 2009 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * Created by Aki Inoue on 07/12/04.
28 * Copyright 2007-2009, Apple Inc. All rights reserved.
32 #include "CFStringEncodingDatabase.h"
33 #include "CFStringEncodingConverterPriv.h"
34 #include "CFICUConverters.h"
35 #include <CoreFoundation/CFStringEncodingExt.h>
36 #include <unicode/ucnv.h>
37 #include <unicode/uversion.h>
38 #include "CFInternal.h"
41 #if DEPLOYMENT_TARGET_WINDOWS
42 #define strncasecmp_l(a, b, c, d) _strnicmp(a, b, c)
43 #define snprintf _snprintf
46 // Thread data support
50 UConverter
**_converters
;
53 static void __CFICUThreadDataDestructor(void *context
) {
54 __CFICUThreadData
* data
= (__CFICUThreadData
*)context
;
56 if (NULL
!= data
->_converters
) { // scan to make sure deallocation
57 UConverter
**converter
= data
->_converters
;
58 UConverter
**limit
= converter
+ data
->_numSlots
;
60 while (converter
< limit
) {
61 if (NULL
!= converter
) ucnv_close(*converter
);
64 CFAllocatorDeallocate(NULL
, data
->_converters
);
67 CFAllocatorDeallocate(NULL
, data
);
70 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
73 CF_INLINE __CFICUThreadData
*__CFStringEncodingICUGetThreadData() {
74 __CFICUThreadData
* data
;
76 pthread_key_init_np(__CFTSDKeyICUConverter
, __CFICUThreadDataDestructor
);
77 data
= (__CFICUThreadData
*)pthread_getspecific(__CFTSDKeyICUConverter
);
80 data
= (__CFICUThreadData
*)CFAllocatorAllocate(NULL
, sizeof(__CFICUThreadData
), 0);
81 memset(data
, 0, sizeof(__CFICUThreadData
));
82 pthread_setspecific(__CFTSDKeyICUConverter
, (const void *)data
);
87 #elif DEPLOYMENT_TARGET_WINDOWS
88 __private_extern__
void __CFStringEncodingICUThreadDataCleaner(void *context
) { __CFICUThreadDataDestructor(context
); }
90 CF_INLINE __CFICUThreadData
*__CFStringEncodingICUGetThreadData() {
91 __CFThreadSpecificData
*threadData
= __CFGetThreadSpecificData_inline();
93 if (NULL
== threadData
->_icuThreadData
) {
94 threadData
->_icuThreadData
= (__CFICUThreadData
*)CFAllocatorAllocate(NULL
, sizeof(__CFICUThreadData
), 0);
95 memset(threadData
->_icuThreadData
, 0, sizeof(__CFICUThreadData
));
98 return (__CFICUThreadData
*)threadData
->_icuThreadData
;
101 #error Need implementation for thread data
104 __private_extern__
const char *__CFStringEncodingGetICUName(CFStringEncoding encoding
) {
105 #define STACK_BUFFER_SIZE (60)
106 char buffer
[STACK_BUFFER_SIZE
];
107 const char *result
= NULL
;
108 UErrorCode errorCode
= U_ZERO_ERROR
;
109 uint32_t codepage
= 0;
111 if (kCFStringEncodingUTF7_IMAP
== encoding
) return "IMAP-mailbox-name";
113 if (kCFStringEncodingUnicode
!= (encoding
& 0x0F00)) codepage
= __CFStringEncodingGetWindowsCodePage(encoding
); // we don't use codepage for UTF to avoid little endian weirdness of Windows
115 if ((0 != codepage
) && (snprintf(buffer
, STACK_BUFFER_SIZE
, "windows-%d", codepage
) < STACK_BUFFER_SIZE
) && (NULL
!= (result
= ucnv_getAlias(buffer
, 0, &errorCode
)))) return result
;
117 if (__CFStringEncodingGetCanonicalName(encoding
, buffer
, STACK_BUFFER_SIZE
)) result
= ucnv_getAlias(buffer
, 0, &errorCode
);
120 #undef STACK_BUFFER_SIZE
123 __private_extern__ CFStringEncoding
__CFStringEncodingGetFromICUName(const char *icuName
) {
125 UErrorCode errorCode
= U_ZERO_ERROR
;
127 if ((0 == strncasecmp_l(icuName
, "windows-", strlen("windows-"), NULL
)) && (0 != (codepage
= strtol(icuName
+ strlen("windows-"), NULL
, 10)))) return __CFStringEncodingGetFromWindowsCodePage(codepage
);
129 if (0 != ucnv_countAliases(icuName
, &errorCode
)) {
130 CFStringEncoding encoding
;
133 // Try WINDOWS platform
134 name
= ucnv_getStandardName(icuName
, "WINDOWS", &errorCode
);
137 if ((0 == strncasecmp_l(name
, "windows-", strlen("windows-"), NULL
)) && (0 != (codepage
= strtol(name
+ strlen("windows-"), NULL
, 10)))) return __CFStringEncodingGetFromWindowsCodePage(codepage
);
139 if (strncasecmp_l(icuName
, name
, strlen(name
), NULL
) && (kCFStringEncodingInvalidId
!= (encoding
= __CFStringEncodingGetFromCanonicalName(name
)))) return encoding
;
143 name
= ucnv_getStandardName(icuName
, "JAVA", &errorCode
);
144 if ((NULL
!= name
) && strncasecmp_l(icuName
, name
, strlen(name
), NULL
) && (kCFStringEncodingInvalidId
!= (encoding
= __CFStringEncodingGetFromCanonicalName(name
)))) return encoding
;
147 name
= ucnv_getStandardName(icuName
, "MIME", &errorCode
);
148 if ((NULL
!= name
) && strncasecmp_l(icuName
, name
, strlen(name
), NULL
) && (kCFStringEncodingInvalidId
!= (encoding
= __CFStringEncodingGetFromCanonicalName(name
)))) return encoding
;
151 return kCFStringEncodingInvalidId
;
154 CF_INLINE UConverter
*__CFStringEncodingConverterCreateICUConverter(const char *icuName
, uint32_t flags
, bool toUnicode
) {
155 UConverter
*converter
;
156 UErrorCode errorCode
= U_ZERO_ERROR
;
157 uint8_t streamID
= CFStringEncodingStreamIDFromMask(flags
);
159 if (0 != streamID
) { // this is a part of streaming previously created
160 __CFICUThreadData
*data
= __CFStringEncodingICUGetThreadData();
162 --streamID
; // map to array index
164 if ((streamID
< data
->_numSlots
) && (NULL
!= data
->_converters
[streamID
])) return data
->_converters
[streamID
];
167 converter
= ucnv_open(icuName
, &errorCode
);
169 if (NULL
!= converter
) {
170 char lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
172 if ((0 == lossyByte
) && (0 != (flags
& kCFStringEncodingAllowLossyConversion
))) lossyByte
= '?';
176 ucnv_setToUCallBack(converter
, &UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
178 ucnv_setFromUCallBack(converter
, &UCNV_FROM_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
181 ucnv_setSubstChars(converter
, &lossyByte
, 1, &errorCode
);
188 #define ICU_CONVERTER_SLOT_INCREMENT (10)
189 #define ICU_CONVERTER_MAX_SLOT (255)
191 static CFIndex
__CFStringEncodingConverterReleaseICUConverter(UConverter
*converter
, uint32_t flags
, CFIndex status
) {
192 uint8_t streamID
= CFStringEncodingStreamIDFromMask(flags
);
194 if ((kCFStringEncodingInvalidInputStream
!= status
) && ((0 != (flags
& kCFStringEncodingPartialInput
)) || ((kCFStringEncodingInsufficientOutputBufferLength
== status
) && (0 != (flags
& kCFStringEncodingPartialOutput
))))) {
196 __CFICUThreadData
*data
= __CFStringEncodingICUGetThreadData();
198 if (NULL
== data
->_converters
) {
199 data
->_converters
= (UConverter
**)CFAllocatorAllocate(NULL
, sizeof(UConverter
*) * ICU_CONVERTER_SLOT_INCREMENT
, 0);
200 memset(data
->_converters
, 0, sizeof(UConverter
*) * ICU_CONVERTER_SLOT_INCREMENT
);
201 data
->_numSlots
= ICU_CONVERTER_SLOT_INCREMENT
;
203 } else if ((data
->_nextSlot
>= data
->_numSlots
) || (NULL
!= data
->_converters
[data
->_nextSlot
])) { // Need to find one
206 for (index
= 0;index
< data
->_numSlots
;index
++) {
207 if (NULL
== data
->_converters
[index
]) {
208 data
->_nextSlot
= index
;
213 if (index
>= data
->_numSlots
) { // we're full
214 UConverter
**newConverters
;
215 CFIndex newSize
= data
->_numSlots
+ ICU_CONVERTER_SLOT_INCREMENT
;
217 if (newSize
> ICU_CONVERTER_MAX_SLOT
) { // something is terribly wrong
218 CFLog(kCFLogLevelError
, CFSTR("Per-thread streaming ID for ICU converters exhausted. Ignoring..."));
219 ucnv_close(converter
);
223 newConverters
= (UConverter
**)CFAllocatorAllocate(NULL
, sizeof(UConverter
*) * newSize
, 0);
224 memset(newConverters
, 0, sizeof(UConverter
*) * newSize
);
225 memcpy(newConverters
, data
->_converters
, sizeof(UConverter
*) * data
->_numSlots
);
226 CFAllocatorDeallocate(NULL
, data
->_converters
);
227 data
->_converters
= newConverters
;
228 data
->_nextSlot
= data
->_numSlots
;
229 data
->_numSlots
= newSize
;
233 data
->_converters
[data
->_nextSlot
] = converter
;
234 streamID
= data
->_nextSlot
+ 1;
236 // now find next slot
239 if ((data
->_nextSlot
>= data
->_numSlots
) || (NULL
!= data
->_converters
[data
->_nextSlot
])) {
242 while ((data
->_nextSlot
< data
->_numSlots
) && (NULL
!= data
->_converters
[data
->_nextSlot
])) ++data
->_nextSlot
;
246 return CFStringEncodingStreamIDToMask(streamID
);
250 __CFICUThreadData
*data
= __CFStringEncodingICUGetThreadData();
252 --streamID
; // map to array index
254 if ((streamID
< data
->_numSlots
) && (converter
== data
->_converters
[streamID
])) {
255 data
->_converters
[streamID
] = NULL
;
256 if (data
->_nextSlot
> streamID
) data
->_nextSlot
= streamID
;
260 ucnv_close(converter
);
265 #define MAX_BUFFER_SIZE (1000)
267 #if (U_ICU_VERSION_MAJOR_NUM > 4) || ((U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM > 0))
268 #warning Unknown ICU version. Check binary compatibility issues for rdar://problem/6024743
270 #define HAS_ICU_BUG_6024743 (1)
271 #define HAS_ICU_BUG_6025527 (1)
273 __private_extern__ CFIndex
__CFStringEncodingICUToBytes(const char *icuName
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, CFIndex
*usedCharLen
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
274 UConverter
*converter
;
275 UErrorCode errorCode
= U_ZERO_ERROR
;
276 const UTF16Char
*source
= characters
;
277 const UTF16Char
*sourceLimit
= source
+ numChars
;
278 char *destination
= (char *)bytes
;
279 const char *destinationLimit
= destination
+ maxByteLen
;
280 bool flush
= ((0 == (flags
& kCFStringEncodingPartialInput
)) ? true : false);
283 if (NULL
== (converter
= __CFStringEncodingConverterCreateICUConverter(icuName
, flags
, false))) return kCFStringEncodingConverterUnavailable
;
285 if (0 == maxByteLen
) {
286 char buffer
[MAX_BUFFER_SIZE
];
287 CFIndex totalLength
= 0;
289 while ((source
< sourceLimit
) && (U_ZERO_ERROR
== errorCode
)) {
290 destination
= buffer
;
291 destinationLimit
= destination
+ MAX_BUFFER_SIZE
;
293 ucnv_fromUnicode(converter
, &destination
, destinationLimit
, (const UChar
**)&source
, (const UChar
*)sourceLimit
, NULL
, flush
, &errorCode
);
295 totalLength
+= (destination
- buffer
);
297 if (U_BUFFER_OVERFLOW_ERROR
== errorCode
) errorCode
= U_ZERO_ERROR
;
300 if (NULL
!= usedByteLen
) *usedByteLen
= totalLength
;
302 ucnv_fromUnicode(converter
, &destination
, destinationLimit
, (const UChar
**)&source
, (const UChar
*)sourceLimit
, NULL
, flush
, &errorCode
);
304 if (NULL
!= usedByteLen
) *usedByteLen
= destination
- (const char *)bytes
;
307 status
= ((U_ZERO_ERROR
== errorCode
) ? kCFStringEncodingConversionSuccess
: ((U_BUFFER_OVERFLOW_ERROR
== errorCode
) ? kCFStringEncodingInsufficientOutputBufferLength
: kCFStringEncodingInvalidInputStream
));
309 if (NULL
!= usedCharLen
) {
310 #if HAS_ICU_BUG_6024743
311 /* ICU has a serious behavioral inconsistency issue that the source pointer returned from ucnv_fromUnicode() is after illegal input. We have to keep track of any changes in this area in order to prevent future binary compatiibility issues */
312 if (kCFStringEncodingInvalidInputStream
== status
) {
313 #define MAX_ERROR_BUFFER_LEN (32)
314 UTF16Char errorBuffer
[MAX_ERROR_BUFFER_LEN
];
315 int8_t errorLength
= MAX_ERROR_BUFFER_LEN
;
316 #undef MAX_ERROR_BUFFER_LEN
318 errorCode
= U_ZERO_ERROR
;
320 ucnv_getInvalidUChars(converter
, (UChar
*)errorBuffer
, &errorLength
, &errorCode
);
322 if (U_ZERO_ERROR
== errorCode
) {
323 source
-= errorLength
;
325 // Gah, something is terribly wrong. Reset everything
326 source
= characters
; // 0 length
327 if (NULL
!= usedByteLen
) *usedByteLen
= 0;
331 *usedCharLen
= source
- characters
;
334 status
|= __CFStringEncodingConverterReleaseICUConverter(converter
, flags
, status
);
339 __private_extern__ CFIndex
__CFStringEncodingICUToUnicode(const char *icuName
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, CFIndex
*usedByteLen
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
340 UConverter
*converter
;
341 UErrorCode errorCode
= U_ZERO_ERROR
;
342 const char *source
= (const char *)bytes
;
343 const char *sourceLimit
= source
+ numBytes
;
344 UTF16Char
*destination
= characters
;
345 const UTF16Char
*destinationLimit
= destination
+ maxCharLen
;
346 bool flush
= ((0 == (flags
& kCFStringEncodingPartialInput
)) ? true : false);
349 if (NULL
== (converter
= __CFStringEncodingConverterCreateICUConverter(icuName
, flags
, true))) return kCFStringEncodingConverterUnavailable
;
351 if (0 == maxCharLen
) {
352 UTF16Char buffer
[MAX_BUFFER_SIZE
];
353 CFIndex totalLength
= 0;
355 while ((source
< sourceLimit
) && (U_ZERO_ERROR
== errorCode
)) {
356 destination
= buffer
;
357 destinationLimit
= destination
+ MAX_BUFFER_SIZE
;
359 ucnv_toUnicode(converter
, (UChar
**)&destination
, (const UChar
*)destinationLimit
, &source
, sourceLimit
, NULL
, flush
, &errorCode
);
361 totalLength
+= (destination
- buffer
);
363 if (U_BUFFER_OVERFLOW_ERROR
== errorCode
) errorCode
= U_ZERO_ERROR
;
366 if (NULL
!= usedCharLen
) *usedCharLen
= totalLength
;
368 ucnv_toUnicode(converter
, (UChar
**)&destination
, (const UChar
*)destinationLimit
, &source
, sourceLimit
, NULL
, flush
, &errorCode
);
370 if (NULL
!= usedCharLen
) *usedCharLen
= destination
- characters
;
373 status
= ((U_ZERO_ERROR
== errorCode
) ? kCFStringEncodingConversionSuccess
: ((U_BUFFER_OVERFLOW_ERROR
== errorCode
) ? kCFStringEncodingInsufficientOutputBufferLength
: kCFStringEncodingInvalidInputStream
));
375 if (NULL
!= usedByteLen
) {
376 #if HAS_ICU_BUG_6024743
377 /* ICU has a serious behavioral inconsistency issue that the source pointer returned from ucnv_toUnicode() is after illegal input. We have to keep track of any changes in this area in order to prevent future binary compatiibility issues */
378 if (kCFStringEncodingInvalidInputStream
== status
) {
379 #define MAX_ERROR_BUFFER_LEN (32)
380 char errorBuffer
[MAX_ERROR_BUFFER_LEN
];
381 int8_t errorLength
= MAX_ERROR_BUFFER_LEN
;
382 #undef MAX_ERROR_BUFFER_LEN
384 errorCode
= U_ZERO_ERROR
;
386 ucnv_getInvalidChars(converter
, errorBuffer
, &errorLength
, &errorCode
);
388 if (U_ZERO_ERROR
== errorCode
) {
389 #if HAS_ICU_BUG_6025527
390 // Another ICU oddness here. ucnv_getInvalidUChars() writes the '\0' terminator, and errorLength includes the extra byte.
391 if ((errorLength
> 0) && ('\0' == errorBuffer
[errorLength
- 1])) --errorLength
;
393 source
-= errorLength
;
395 // Gah, something is terribly wrong. Reset everything
396 source
= (const char *)bytes
; // 0 length
397 if (NULL
!= usedCharLen
) *usedCharLen
= 0;
402 *usedByteLen
= source
- (const char *)bytes
;
405 status
|= __CFStringEncodingConverterReleaseICUConverter(converter
, flags
, status
);
410 __private_extern__ CFIndex
__CFStringEncodingICUCharLength(const char *icuName
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
) {
412 return (__CFStringEncodingICUToUnicode(icuName
, flags
, bytes
, numBytes
, NULL
, NULL
, 0, &usedCharLen
) == kCFStringEncodingConversionSuccess
? usedCharLen
: 0);
415 __private_extern__ CFIndex
__CFStringEncodingICUByteLength(const char *icuName
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
) {
417 return (__CFStringEncodingICUToBytes(icuName
, flags
, characters
, numChars
, NULL
, NULL
, 0, &usedByteLen
) == kCFStringEncodingConversionSuccess
? usedByteLen
: 0);
420 __private_extern__ CFStringEncoding
*__CFStringEncodingCreateICUEncodings(CFAllocatorRef allocator
, CFIndex
*numberOfIndex
) {
421 CFIndex count
= ucnv_countAvailable();
422 CFIndex numEncodings
= 0;
423 CFStringEncoding
*encodings
;
424 CFStringEncoding encoding
;
427 if (0 == count
) return NULL
;
429 encodings
= (CFStringEncoding
*)CFAllocatorAllocate(NULL
, sizeof(CFStringEncoding
) * count
, 0);
431 for (index
= 0;index
< count
;index
++) {
432 encoding
= __CFStringEncodingGetFromICUName(ucnv_getAvailableName(index
));
434 if (kCFStringEncodingInvalidId
!= encoding
) encodings
[numEncodings
++] = encoding
;
437 if (0 == numEncodings
) {
438 CFAllocatorDeallocate(allocator
, encodings
);
442 *numberOfIndex
= numEncodings
;