2 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /* CFPlatformConverters.c
25 Copyright (c) 1998-2011, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFString.h>
31 #include "CFStringEncodingConverterExt.h"
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include <CoreFoundation/CFPreferences.h>
34 #include "CFUniChar.h"
35 #include "CFUnicodeDecomposition.h"
36 #include "CFStringEncodingConverterPriv.h"
37 #include "CFICUConverters.h"
40 CF_INLINE
bool __CFIsPlatformConverterAvailable(int encoding
) {
42 #if DEPLOYMENT_TARGET_WINDOWS
43 return (IsValidCodePage(CFStringConvertEncodingToWindowsCodepage(encoding
)) ? true : false);
49 static const CFStringEncodingConverter __CFICUBootstrap
= {
50 NULL
/* toBytes */, NULL
/* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
51 kCFStringEncodingConverterICU
/* encodingClass */,
52 NULL
/* toBytesLen */, NULL
/* toUnicodeLen */, NULL
/* toBytesFallback */,
53 NULL
/* toUnicodeFallback */, NULL
/* toBytesPrecompose */, NULL
, /* isValidCombiningChar */
56 static const CFStringEncodingConverter __CFPlatformBootstrap
= {
57 NULL
/* toBytes */, NULL
/* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
58 kCFStringEncodingConverterPlatformSpecific
/* encodingClass */,
59 NULL
/* toBytesLen */, NULL
/* toUnicodeLen */, NULL
/* toBytesFallback */,
60 NULL
/* toUnicodeFallback */, NULL
/* toBytesPrecompose */, NULL
, /* isValidCombiningChar */
63 __private_extern__
const CFStringEncodingConverter
*__CFStringEncodingGetExternalConverter(uint32_t encoding
) {
65 return (__CFIsPlatformConverterAvailable(encoding
) ? &__CFPlatformBootstrap
: (__CFStringEncodingGetICUName(encoding
) ? &__CFICUBootstrap
: NULL
)); // we prefer Text Encoding Converter ICU since it's more reliable
68 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
69 __private_extern__ CFStringEncoding
*__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator
, CFIndex
*numberOfConverters
) {
73 #elif DEPLOYMENT_TARGET_WINDOWS
77 static uint32_t __CFWin32EncodingIndex
= 0;
78 static CFStringEncoding
*__CFWin32EncodingList
= NULL
;
80 static char CALLBACK
__CFWin32EnumCodePageProc(LPTSTR string
) {
81 uint32_t encoding
= CFStringConvertWindowsCodepageToEncoding(_tcstoul(string
, NULL
, 10));
84 if (encoding
!= kCFStringEncodingInvalidId
) { // We list only encodings we know
85 if (__CFWin32EncodingList
) {
86 for (idx
= 0;idx
< (CFIndex
)__CFWin32EncodingIndex
;idx
++) if (__CFWin32EncodingList
[idx
] == encoding
) break;
87 if (idx
!= __CFWin32EncodingIndex
) return true;
88 __CFWin32EncodingList
[__CFWin32EncodingIndex
] = encoding
;
90 ++__CFWin32EncodingIndex
;
95 __private_extern__ CFStringEncoding
*__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator
, CFIndex
*numberOfConverters
) {
96 CFStringEncoding
*encodings
;
98 EnumSystemCodePages((CODEPAGE_ENUMPROC
)&__CFWin32EnumCodePageProc
, CP_INSTALLED
);
99 __CFWin32EncodingList
= (uint32_t *)CFAllocatorAllocate(allocator
, sizeof(uint32_t) * __CFWin32EncodingIndex
, 0);
100 EnumSystemCodePages((CODEPAGE_ENUMPROC
)&__CFWin32EnumCodePageProc
, CP_INSTALLED
);
102 *numberOfConverters
= __CFWin32EncodingIndex
;
103 encodings
= __CFWin32EncodingList
;
105 __CFWin32EncodingIndex
= 0;
106 __CFWin32EncodingList
= NULL
;
111 __private_extern__ CFStringEncoding
*__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator
, CFIndex
*numberOfConverters
) { return NULL
; }
114 __private_extern__ CFIndex
__CFStringEncodingPlatformUnicodeToBytes(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, CFIndex
*usedCharLen
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
116 #if DEPLOYMENT_TARGET_WINDOWS
120 if ((kCFStringEncodingUTF7
!= encoding
) && (kCFStringEncodingGB_18030_2000
!= encoding
) && (0x0800 != (encoding
& 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
121 dwFlags
|= (flags
& (kCFStringEncodingAllowLossyConversion
|kCFStringEncodingSubstituteCombinings
) ? WC_DEFAULTCHAR
: 0);
122 dwFlags
|= (flags
& kCFStringEncodingComposeCombinings
? WC_COMPOSITECHECK
: 0);
123 dwFlags
|= (flags
& kCFStringEncodingIgnoreCombinings
? WC_DISCARDNS
: 0);
126 if ((usedLen
= WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, numChars
, (LPSTR
)bytes
, maxByteLen
, NULL
, NULL
)) == 0) {
127 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER
) {
130 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding
), &cpInfo
)) {
131 cpInfo
.MaxCharSize
= 1; // Is this right ???
133 if (cpInfo
.MaxCharSize
== 1) {
134 numChars
= maxByteLen
;
136 usedLen
= WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, numChars
, NULL
, 0, NULL
, NULL
);
137 usedLen
-= maxByteLen
;
138 numChars
= (numChars
> usedLen
? numChars
- usedLen
: 1);
140 if (WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, numChars
, (LPSTR
)bytes
, maxByteLen
, NULL
, NULL
) == 0) {
141 if (usedCharLen
) *usedCharLen
= 0;
142 if (usedByteLen
) *usedByteLen
= 0;
144 CFIndex lastUsedLen
= 0;
146 while ((usedLen
= WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, ++numChars
, (LPSTR
)bytes
, maxByteLen
, NULL
, NULL
))) lastUsedLen
= usedLen
;
147 if (usedCharLen
) *usedCharLen
= (numChars
- 1);
148 if (usedByteLen
) *usedByteLen
= lastUsedLen
;
151 return kCFStringEncodingInsufficientOutputBufferLength
;
153 return kCFStringEncodingInvalidInputStream
;
156 if (usedCharLen
) *usedCharLen
= numChars
;
157 if (usedByteLen
) *usedByteLen
= usedLen
;
158 return kCFStringEncodingConversionSuccess
;
160 #endif /* DEPLOYMENT_TARGET_WINDOWS */
162 return kCFStringEncodingConverterUnavailable
;
165 __private_extern__ CFIndex
__CFStringEncodingPlatformBytesToUnicode(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, CFIndex
*usedByteLen
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
167 #if DEPLOYMENT_TARGET_WINDOWS
171 if ((kCFStringEncodingUTF7
!= encoding
) && (kCFStringEncodingGB_18030_2000
!= encoding
) && (0x0800 != (encoding
& 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
172 dwFlags
|= (flags
& (kCFStringEncodingAllowLossyConversion
|kCFStringEncodingSubstituteCombinings
) ? 0 : MB_ERR_INVALID_CHARS
);
173 dwFlags
|= (flags
& (kCFStringEncodingUseCanonical
|kCFStringEncodingUseHFSPlusCanonical
) ? MB_COMPOSITE
: MB_PRECOMPOSED
);
176 if ((usedLen
= MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCSTR
)bytes
, numBytes
, (LPWSTR
)characters
, maxCharLen
) == 0)) {
177 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER
) {
180 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding
), &cpInfo
)) {
181 cpInfo
.MaxCharSize
= 1; // Is this right ???
183 if (cpInfo
.MaxCharSize
== 1) {
184 numBytes
= maxCharLen
;
186 usedLen
= MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCSTR
)bytes
, numBytes
, (LPWSTR
)characters
, maxCharLen
);
187 usedLen
-= maxCharLen
;
188 numBytes
= (numBytes
> usedLen
? numBytes
- usedLen
: 1);
190 while ((usedLen
= MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCSTR
)bytes
, numBytes
, (LPWSTR
)characters
, maxCharLen
)) == 0) {
191 if ((--numBytes
) == 0) break;
193 if (usedCharLen
) *usedCharLen
= usedLen
;
194 if (usedByteLen
) *usedByteLen
= numBytes
;
196 return kCFStringEncodingInsufficientOutputBufferLength
;
198 return kCFStringEncodingInvalidInputStream
;
201 if (usedCharLen
) *usedCharLen
= usedLen
;
202 if (usedByteLen
) *usedByteLen
= numBytes
;
203 return kCFStringEncodingConversionSuccess
;
205 #endif /* DEPLOYMENT_TARGET_WINDOWS */
207 return kCFStringEncodingConverterUnavailable
;
210 __private_extern__ CFIndex
__CFStringEncodingPlatformCharLengthForBytes(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
) {
212 return (__CFStringEncodingPlatformBytesToUnicode(encoding
, flags
, bytes
, numBytes
, NULL
, NULL
, 0, &usedCharLen
) == kCFStringEncodingConversionSuccess
? usedCharLen
: 0);
215 __private_extern__ CFIndex
__CFStringEncodingPlatformByteLengthForCharacters(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
) {
217 return (__CFStringEncodingPlatformUnicodeToBytes(encoding
, flags
, characters
, numChars
, NULL
, NULL
, 0, &usedByteLen
) == kCFStringEncodingConversionSuccess
? usedByteLen
: 0);
220 #undef __CFCarbonCore_GetTextEncodingBase0