2 * Copyright (c) 2009 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
28 * Created by Aki Inoue on 07/12/04.
29 * Copyright 2007-2009, Apple Inc. All rights reserved.
33 #include "CFStringEncodingDatabase.h"
34 #include "CFStringEncodingConverterPriv.h"
35 #include "CFICUConverters.h"
36 #include <CoreFoundation/CFStringEncodingExt.h>
37 #include <unicode/ucnv.h>
38 #include <unicode/uversion.h>
39 #include "CFInternal.h"
42 #if DEPLOYMENT_TARGET_WINDOWS
43 #define strncasecmp_l(a, b, c, d) _strnicmp(a, b, c)
44 #define snprintf _snprintf
47 // Thread data support
51 UConverter
**_converters
;
54 static void __CFICUThreadDataDestructor(void *context
) {
55 __CFICUThreadData
* data
= (__CFICUThreadData
*)context
;
57 if (NULL
!= data
->_converters
) { // scan to make sure deallocation
58 UConverter
**converter
= data
->_converters
;
59 UConverter
**limit
= converter
+ data
->_numSlots
;
61 while (converter
< limit
) {
62 if (NULL
!= converter
) ucnv_close(*converter
);
65 CFAllocatorDeallocate(NULL
, data
->_converters
);
68 CFAllocatorDeallocate(NULL
, data
);
71 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
74 CF_INLINE __CFICUThreadData
*__CFStringEncodingICUGetThreadData() {
75 __CFICUThreadData
* data
;
77 pthread_key_init_np(__CFTSDKeyICUConverter
, __CFICUThreadDataDestructor
);
78 data
= (__CFICUThreadData
*)pthread_getspecific(__CFTSDKeyICUConverter
);
81 data
= (__CFICUThreadData
*)CFAllocatorAllocate(NULL
, sizeof(__CFICUThreadData
), 0);
82 memset(data
, 0, sizeof(__CFICUThreadData
));
83 pthread_setspecific(__CFTSDKeyICUConverter
, (const void *)data
);
88 #elif DEPLOYMENT_TARGET_WINDOWS
89 __private_extern__
void __CFStringEncodingICUThreadDataCleaner(void *context
) { __CFICUThreadDataDestructor(context
); }
91 CF_INLINE __CFICUThreadData
*__CFStringEncodingICUGetThreadData() {
92 __CFThreadSpecificData
*threadData
= __CFGetThreadSpecificData_inline();
94 if (NULL
== threadData
->_icuThreadData
) {
95 threadData
->_icuThreadData
= (__CFICUThreadData
*)CFAllocatorAllocate(NULL
, sizeof(__CFICUThreadData
), 0);
96 memset(threadData
->_icuThreadData
, 0, sizeof(__CFICUThreadData
));
99 return (__CFICUThreadData
*)threadData
->_icuThreadData
;
102 #error Need implementation for thread data
105 __private_extern__
const char *__CFStringEncodingGetICUName(CFStringEncoding encoding
) {
106 #define STACK_BUFFER_SIZE (60)
107 char buffer
[STACK_BUFFER_SIZE
];
108 const char *result
= NULL
;
109 UErrorCode errorCode
= U_ZERO_ERROR
;
110 uint32_t codepage
= 0;
112 if (kCFStringEncodingUTF7_IMAP
== encoding
) return "IMAP-mailbox-name";
114 if (kCFStringEncodingUnicode
!= (encoding
& 0x0F00)) codepage
= __CFStringEncodingGetWindowsCodePage(encoding
); // we don't use codepage for UTF to avoid little endian weirdness of Windows
116 if ((0 != codepage
) && (snprintf(buffer
, STACK_BUFFER_SIZE
, "windows-%d", codepage
) < STACK_BUFFER_SIZE
) && (NULL
!= (result
= ucnv_getAlias(buffer
, 0, &errorCode
)))) return result
;
118 if (__CFStringEncodingGetCanonicalName(encoding
, buffer
, STACK_BUFFER_SIZE
)) result
= ucnv_getAlias(buffer
, 0, &errorCode
);
121 #undef STACK_BUFFER_SIZE
124 __private_extern__ CFStringEncoding
__CFStringEncodingGetFromICUName(const char *icuName
) {
126 UErrorCode errorCode
= U_ZERO_ERROR
;
128 if ((0 == strncasecmp_l(icuName
, "windows-", strlen("windows-"), NULL
)) && (0 != (codepage
= strtol(icuName
+ strlen("windows-"), NULL
, 10)))) return __CFStringEncodingGetFromWindowsCodePage(codepage
);
130 if (0 != ucnv_countAliases(icuName
, &errorCode
)) {
131 CFStringEncoding encoding
;
134 // Try WINDOWS platform
135 name
= ucnv_getStandardName(icuName
, "WINDOWS", &errorCode
);
138 if ((0 == strncasecmp_l(name
, "windows-", strlen("windows-"), NULL
)) && (0 != (codepage
= strtol(name
+ strlen("windows-"), NULL
, 10)))) return __CFStringEncodingGetFromWindowsCodePage(codepage
);
140 if (strncasecmp_l(icuName
, name
, strlen(name
), NULL
) && (kCFStringEncodingInvalidId
!= (encoding
= __CFStringEncodingGetFromCanonicalName(name
)))) return encoding
;
144 name
= ucnv_getStandardName(icuName
, "JAVA", &errorCode
);
145 if ((NULL
!= name
) && strncasecmp_l(icuName
, name
, strlen(name
), NULL
) && (kCFStringEncodingInvalidId
!= (encoding
= __CFStringEncodingGetFromCanonicalName(name
)))) return encoding
;
148 name
= ucnv_getStandardName(icuName
, "MIME", &errorCode
);
149 if ((NULL
!= name
) && strncasecmp_l(icuName
, name
, strlen(name
), NULL
) && (kCFStringEncodingInvalidId
!= (encoding
= __CFStringEncodingGetFromCanonicalName(name
)))) return encoding
;
152 return kCFStringEncodingInvalidId
;
155 CF_INLINE UConverter
*__CFStringEncodingConverterCreateICUConverter(const char *icuName
, uint32_t flags
, bool toUnicode
) {
156 UConverter
*converter
;
157 UErrorCode errorCode
= U_ZERO_ERROR
;
158 uint8_t streamID
= CFStringEncodingStreamIDFromMask(flags
);
160 if (0 != streamID
) { // this is a part of streaming previously created
161 __CFICUThreadData
*data
= __CFStringEncodingICUGetThreadData();
163 --streamID
; // map to array index
165 if ((streamID
< data
->_numSlots
) && (NULL
!= data
->_converters
[streamID
])) return data
->_converters
[streamID
];
168 converter
= ucnv_open(icuName
, &errorCode
);
170 if (NULL
!= converter
) {
171 char lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
173 if ((0 == lossyByte
) && (0 != (flags
& kCFStringEncodingAllowLossyConversion
))) lossyByte
= '?';
177 ucnv_setToUCallBack(converter
, &UCNV_TO_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
179 ucnv_setFromUCallBack(converter
, &UCNV_FROM_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, &errorCode
);
182 ucnv_setSubstChars(converter
, &lossyByte
, 1, &errorCode
);
189 #define ICU_CONVERTER_SLOT_INCREMENT (10)
190 #define ICU_CONVERTER_MAX_SLOT (255)
192 static CFIndex
__CFStringEncodingConverterReleaseICUConverter(UConverter
*converter
, uint32_t flags
, CFIndex status
) {
193 uint8_t streamID
= CFStringEncodingStreamIDFromMask(flags
);
195 if ((kCFStringEncodingInvalidInputStream
!= status
) && ((0 != (flags
& kCFStringEncodingPartialInput
)) || ((kCFStringEncodingInsufficientOutputBufferLength
== status
) && (0 != (flags
& kCFStringEncodingPartialOutput
))))) {
197 __CFICUThreadData
*data
= __CFStringEncodingICUGetThreadData();
199 if (NULL
== data
->_converters
) {
200 data
->_converters
= (UConverter
**)CFAllocatorAllocate(NULL
, sizeof(UConverter
*) * ICU_CONVERTER_SLOT_INCREMENT
, 0);
201 memset(data
->_converters
, 0, sizeof(UConverter
*) * ICU_CONVERTER_SLOT_INCREMENT
);
202 data
->_numSlots
= ICU_CONVERTER_SLOT_INCREMENT
;
204 } else if ((data
->_nextSlot
>= data
->_numSlots
) || (NULL
!= data
->_converters
[data
->_nextSlot
])) { // Need to find one
207 for (index
= 0;index
< data
->_numSlots
;index
++) {
208 if (NULL
== data
->_converters
[index
]) {
209 data
->_nextSlot
= index
;
214 if (index
>= data
->_numSlots
) { // we're full
215 UConverter
**newConverters
;
216 CFIndex newSize
= data
->_numSlots
+ ICU_CONVERTER_SLOT_INCREMENT
;
218 if (newSize
> ICU_CONVERTER_MAX_SLOT
) { // something is terribly wrong
219 CFLog(kCFLogLevelError
, CFSTR("Per-thread streaming ID for ICU converters exhausted. Ignoring..."));
220 ucnv_close(converter
);
224 newConverters
= (UConverter
**)CFAllocatorAllocate(NULL
, sizeof(UConverter
*) * newSize
, 0);
225 memset(newConverters
, 0, sizeof(UConverter
*) * newSize
);
226 memcpy(newConverters
, data
->_converters
, sizeof(UConverter
*) * data
->_numSlots
);
227 CFAllocatorDeallocate(NULL
, data
->_converters
);
228 data
->_converters
= newConverters
;
229 data
->_nextSlot
= data
->_numSlots
;
230 data
->_numSlots
= newSize
;
234 data
->_converters
[data
->_nextSlot
] = converter
;
235 streamID
= data
->_nextSlot
+ 1;
237 // now find next slot
240 if ((data
->_nextSlot
>= data
->_numSlots
) || (NULL
!= data
->_converters
[data
->_nextSlot
])) {
243 while ((data
->_nextSlot
< data
->_numSlots
) && (NULL
!= data
->_converters
[data
->_nextSlot
])) ++data
->_nextSlot
;
247 return CFStringEncodingStreamIDToMask(streamID
);
251 __CFICUThreadData
*data
= __CFStringEncodingICUGetThreadData();
253 --streamID
; // map to array index
255 if ((streamID
< data
->_numSlots
) && (converter
== data
->_converters
[streamID
])) {
256 data
->_converters
[streamID
] = NULL
;
257 if (data
->_nextSlot
> streamID
) data
->_nextSlot
= streamID
;
261 ucnv_close(converter
);
266 #define MAX_BUFFER_SIZE (1000)
268 #if (U_ICU_VERSION_MAJOR_NUM > 4) || ((U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM > 0))
269 #warning Unknown ICU version. Check binary compatibility issues for rdar://problem/6024743
271 #define HAS_ICU_BUG_6024743 (1)
272 #define HAS_ICU_BUG_6025527 (1)
274 __private_extern__ CFIndex
__CFStringEncodingICUToBytes(const char *icuName
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, CFIndex
*usedCharLen
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
275 UConverter
*converter
;
276 UErrorCode errorCode
= U_ZERO_ERROR
;
277 const UTF16Char
*source
= characters
;
278 const UTF16Char
*sourceLimit
= source
+ numChars
;
279 char *destination
= (char *)bytes
;
280 const char *destinationLimit
= destination
+ maxByteLen
;
281 bool flush
= ((0 == (flags
& kCFStringEncodingPartialInput
)) ? true : false);
284 if (NULL
== (converter
= __CFStringEncodingConverterCreateICUConverter(icuName
, flags
, false))) return kCFStringEncodingConverterUnavailable
;
286 if (0 == maxByteLen
) {
287 char buffer
[MAX_BUFFER_SIZE
];
288 CFIndex totalLength
= 0;
290 while ((source
< sourceLimit
) && (U_ZERO_ERROR
== errorCode
)) {
291 destination
= buffer
;
292 destinationLimit
= destination
+ MAX_BUFFER_SIZE
;
294 ucnv_fromUnicode(converter
, &destination
, destinationLimit
, (const UChar
**)&source
, (const UChar
*)sourceLimit
, NULL
, flush
, &errorCode
);
296 totalLength
+= (destination
- buffer
);
298 if (U_BUFFER_OVERFLOW_ERROR
== errorCode
) errorCode
= U_ZERO_ERROR
;
301 if (NULL
!= usedByteLen
) *usedByteLen
= totalLength
;
303 ucnv_fromUnicode(converter
, &destination
, destinationLimit
, (const UChar
**)&source
, (const UChar
*)sourceLimit
, NULL
, flush
, &errorCode
);
305 if (NULL
!= usedByteLen
) *usedByteLen
= destination
- (const char *)bytes
;
308 status
= ((U_ZERO_ERROR
== errorCode
) ? kCFStringEncodingConversionSuccess
: ((U_BUFFER_OVERFLOW_ERROR
== errorCode
) ? kCFStringEncodingInsufficientOutputBufferLength
: kCFStringEncodingInvalidInputStream
));
310 if (NULL
!= usedCharLen
) {
311 #if HAS_ICU_BUG_6024743
312 /* ICU has a serious behavioral inconsistency issue that the source pointer returned from ucnv_fromUnicode() is after illegal input. We have to keep track of any changes in this area in order to prevent future binary compatiibility issues */
313 if (kCFStringEncodingInvalidInputStream
== status
) {
314 #define MAX_ERROR_BUFFER_LEN (32)
315 UTF16Char errorBuffer
[MAX_ERROR_BUFFER_LEN
];
316 int8_t errorLength
= MAX_ERROR_BUFFER_LEN
;
317 #undef MAX_ERROR_BUFFER_LEN
319 errorCode
= U_ZERO_ERROR
;
321 ucnv_getInvalidUChars(converter
, (UChar
*)errorBuffer
, &errorLength
, &errorCode
);
323 if (U_ZERO_ERROR
== errorCode
) {
324 source
-= errorLength
;
326 // Gah, something is terribly wrong. Reset everything
327 source
= characters
; // 0 length
328 if (NULL
!= usedByteLen
) *usedByteLen
= 0;
332 *usedCharLen
= source
- characters
;
335 status
|= __CFStringEncodingConverterReleaseICUConverter(converter
, flags
, status
);
340 __private_extern__ CFIndex
__CFStringEncodingICUToUnicode(const char *icuName
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, CFIndex
*usedByteLen
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
341 UConverter
*converter
;
342 UErrorCode errorCode
= U_ZERO_ERROR
;
343 const char *source
= (const char *)bytes
;
344 const char *sourceLimit
= source
+ numBytes
;
345 UTF16Char
*destination
= characters
;
346 const UTF16Char
*destinationLimit
= destination
+ maxCharLen
;
347 bool flush
= ((0 == (flags
& kCFStringEncodingPartialInput
)) ? true : false);
350 if (NULL
== (converter
= __CFStringEncodingConverterCreateICUConverter(icuName
, flags
, true))) return kCFStringEncodingConverterUnavailable
;
352 if (0 == maxCharLen
) {
353 UTF16Char buffer
[MAX_BUFFER_SIZE
];
354 CFIndex totalLength
= 0;
356 while ((source
< sourceLimit
) && (U_ZERO_ERROR
== errorCode
)) {
357 destination
= buffer
;
358 destinationLimit
= destination
+ MAX_BUFFER_SIZE
;
360 ucnv_toUnicode(converter
, (UChar
**)&destination
, (const UChar
*)destinationLimit
, &source
, sourceLimit
, NULL
, flush
, &errorCode
);
362 totalLength
+= (destination
- buffer
);
364 if (U_BUFFER_OVERFLOW_ERROR
== errorCode
) errorCode
= U_ZERO_ERROR
;
367 if (NULL
!= usedCharLen
) *usedCharLen
= totalLength
;
369 ucnv_toUnicode(converter
, (UChar
**)&destination
, (const UChar
*)destinationLimit
, &source
, sourceLimit
, NULL
, flush
, &errorCode
);
371 if (NULL
!= usedCharLen
) *usedCharLen
= destination
- characters
;
374 status
= ((U_ZERO_ERROR
== errorCode
) ? kCFStringEncodingConversionSuccess
: ((U_BUFFER_OVERFLOW_ERROR
== errorCode
) ? kCFStringEncodingInsufficientOutputBufferLength
: kCFStringEncodingInvalidInputStream
));
376 if (NULL
!= usedByteLen
) {
377 #if HAS_ICU_BUG_6024743
378 /* ICU has a serious behavioral inconsistency issue that the source pointer returned from ucnv_toUnicode() is after illegal input. We have to keep track of any changes in this area in order to prevent future binary compatiibility issues */
379 if (kCFStringEncodingInvalidInputStream
== status
) {
380 #define MAX_ERROR_BUFFER_LEN (32)
381 char errorBuffer
[MAX_ERROR_BUFFER_LEN
];
382 int8_t errorLength
= MAX_ERROR_BUFFER_LEN
;
383 #undef MAX_ERROR_BUFFER_LEN
385 errorCode
= U_ZERO_ERROR
;
387 ucnv_getInvalidChars(converter
, errorBuffer
, &errorLength
, &errorCode
);
389 if (U_ZERO_ERROR
== errorCode
) {
390 #if HAS_ICU_BUG_6025527
391 // Another ICU oddness here. ucnv_getInvalidUChars() writes the '\0' terminator, and errorLength includes the extra byte.
392 if ((errorLength
> 0) && ('\0' == errorBuffer
[errorLength
- 1])) --errorLength
;
394 source
-= errorLength
;
396 // Gah, something is terribly wrong. Reset everything
397 source
= (const char *)bytes
; // 0 length
398 if (NULL
!= usedCharLen
) *usedCharLen
= 0;
403 *usedByteLen
= source
- (const char *)bytes
;
406 status
|= __CFStringEncodingConverterReleaseICUConverter(converter
, flags
, status
);
411 __private_extern__ CFIndex
__CFStringEncodingICUCharLength(const char *icuName
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
) {
413 return (__CFStringEncodingICUToUnicode(icuName
, flags
, bytes
, numBytes
, NULL
, NULL
, 0, &usedCharLen
) == kCFStringEncodingConversionSuccess
? usedCharLen
: 0);
416 __private_extern__ CFIndex
__CFStringEncodingICUByteLength(const char *icuName
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
) {
418 return (__CFStringEncodingICUToBytes(icuName
, flags
, characters
, numChars
, NULL
, NULL
, 0, &usedByteLen
) == kCFStringEncodingConversionSuccess
? usedByteLen
: 0);
421 __private_extern__ CFStringEncoding
*__CFStringEncodingCreateICUEncodings(CFAllocatorRef allocator
, CFIndex
*numberOfIndex
) {
422 CFIndex count
= ucnv_countAvailable();
423 CFIndex numEncodings
= 0;
424 CFStringEncoding
*encodings
;
425 CFStringEncoding encoding
;
428 if (0 == count
) return NULL
;
430 encodings
= (CFStringEncoding
*)CFAllocatorAllocate(NULL
, sizeof(CFStringEncoding
) * count
, 0);
432 for (index
= 0;index
< count
;index
++) {
433 encoding
= __CFStringEncodingGetFromICUName(ucnv_getAvailableName(index
));
435 if (kCFStringEncodingInvalidId
!= encoding
) encodings
[numEncodings
++] = encoding
;
438 if (0 == numEncodings
) {
439 CFAllocatorDeallocate(allocator
, encodings
);
443 *numberOfIndex
= numEncodings
;