2 * Copyright (c) 2013 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /* CFPlatformConverters.c
25 Copyright (c) 1998-2013, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFString.h>
31 #include "CFStringEncodingConverterExt.h"
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFUniChar.h"
34 #include "CFUnicodeDecomposition.h"
35 #include "CFStringEncodingConverterPriv.h"
36 #include "CFICUConverters.h"
39 CF_INLINE
bool __CFIsPlatformConverterAvailable(int encoding
) {
41 #if DEPLOYMENT_TARGET_WINDOWS
42 return (IsValidCodePage(CFStringConvertEncodingToWindowsCodepage(encoding
)) ? true : false);
48 static const CFStringEncodingConverter __CFICUBootstrap
= {
49 NULL
/* toBytes */, NULL
/* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
50 kCFStringEncodingConverterICU
/* encodingClass */,
51 NULL
/* toBytesLen */, NULL
/* toUnicodeLen */, NULL
/* toBytesFallback */,
52 NULL
/* toUnicodeFallback */, NULL
/* toBytesPrecompose */, NULL
, /* isValidCombiningChar */
55 static const CFStringEncodingConverter __CFPlatformBootstrap
= {
56 NULL
/* toBytes */, NULL
/* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
57 kCFStringEncodingConverterPlatformSpecific
/* encodingClass */,
58 NULL
/* toBytesLen */, NULL
/* toUnicodeLen */, NULL
/* toBytesFallback */,
59 NULL
/* toUnicodeFallback */, NULL
/* toBytesPrecompose */, NULL
, /* isValidCombiningChar */
62 CF_PRIVATE
const CFStringEncodingConverter
*__CFStringEncodingGetExternalConverter(uint32_t encoding
) {
64 // we prefer Text Encoding Converter ICU since it's more reliable
65 if (__CFIsPlatformConverterAvailable(encoding
)) {
66 return &__CFPlatformBootstrap
;
68 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
69 if (__CFStringEncodingGetICUName(encoding
)) {
70 return &__CFICUBootstrap
;
77 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
78 CF_PRIVATE CFStringEncoding
*__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator
, CFIndex
*numberOfConverters
) {
82 #elif DEPLOYMENT_TARGET_WINDOWS
86 static uint32_t __CFWin32EncodingIndex
= 0;
87 static CFStringEncoding
*__CFWin32EncodingList
= NULL
;
89 static char CALLBACK
__CFWin32EnumCodePageProc(LPTSTR string
) {
90 uint32_t encoding
= CFStringConvertWindowsCodepageToEncoding(_tcstoul(string
, NULL
, 10));
93 if (encoding
!= kCFStringEncodingInvalidId
) { // We list only encodings we know
94 if (__CFWin32EncodingList
) {
95 for (idx
= 0;idx
< (CFIndex
)__CFWin32EncodingIndex
;idx
++) if (__CFWin32EncodingList
[idx
] == encoding
) break;
96 if (idx
!= __CFWin32EncodingIndex
) return true;
97 __CFWin32EncodingList
[__CFWin32EncodingIndex
] = encoding
;
99 ++__CFWin32EncodingIndex
;
104 CF_PRIVATE CFStringEncoding
*__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator
, CFIndex
*numberOfConverters
) {
105 CFStringEncoding
*encodings
;
107 EnumSystemCodePages((CODEPAGE_ENUMPROC
)&__CFWin32EnumCodePageProc
, CP_INSTALLED
);
108 __CFWin32EncodingList
= (uint32_t *)CFAllocatorAllocate(allocator
, sizeof(uint32_t) * __CFWin32EncodingIndex
, 0);
109 EnumSystemCodePages((CODEPAGE_ENUMPROC
)&__CFWin32EnumCodePageProc
, CP_INSTALLED
);
111 *numberOfConverters
= __CFWin32EncodingIndex
;
112 encodings
= __CFWin32EncodingList
;
114 __CFWin32EncodingIndex
= 0;
115 __CFWin32EncodingList
= NULL
;
120 CF_PRIVATE CFStringEncoding
*__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator
, CFIndex
*numberOfConverters
) { return NULL
; }
123 CF_PRIVATE CFIndex
__CFStringEncodingPlatformUnicodeToBytes(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, CFIndex
*usedCharLen
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
125 #if DEPLOYMENT_TARGET_WINDOWS
129 if ((kCFStringEncodingUTF7
!= encoding
) && (kCFStringEncodingGB_18030_2000
!= encoding
) && (0x0800 != (encoding
& 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
130 dwFlags
|= (flags
& (kCFStringEncodingAllowLossyConversion
|kCFStringEncodingSubstituteCombinings
) ? WC_DEFAULTCHAR
: 0);
131 dwFlags
|= (flags
& kCFStringEncodingComposeCombinings
? WC_COMPOSITECHECK
: 0);
132 dwFlags
|= (flags
& kCFStringEncodingIgnoreCombinings
? WC_DISCARDNS
: 0);
135 if ((usedLen
= WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, numChars
, (LPSTR
)bytes
, maxByteLen
, NULL
, NULL
)) == 0) {
136 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER
) {
139 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding
), &cpInfo
)) {
140 cpInfo
.MaxCharSize
= 1; // Is this right ???
142 if (cpInfo
.MaxCharSize
== 1) {
143 numChars
= maxByteLen
;
145 usedLen
= WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, numChars
, NULL
, 0, NULL
, NULL
);
146 usedLen
-= maxByteLen
;
147 numChars
= (numChars
> usedLen
? numChars
- usedLen
: 1);
149 if (WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, numChars
, (LPSTR
)bytes
, maxByteLen
, NULL
, NULL
) == 0) {
150 if (usedCharLen
) *usedCharLen
= 0;
151 if (usedByteLen
) *usedByteLen
= 0;
153 CFIndex lastUsedLen
= 0;
155 while ((usedLen
= WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, ++numChars
, (LPSTR
)bytes
, maxByteLen
, NULL
, NULL
))) lastUsedLen
= usedLen
;
156 if (usedCharLen
) *usedCharLen
= (numChars
- 1);
157 if (usedByteLen
) *usedByteLen
= lastUsedLen
;
160 return kCFStringEncodingInsufficientOutputBufferLength
;
162 return kCFStringEncodingInvalidInputStream
;
165 if (usedCharLen
) *usedCharLen
= numChars
;
166 if (usedByteLen
) *usedByteLen
= usedLen
;
167 return kCFStringEncodingConversionSuccess
;
169 #endif /* DEPLOYMENT_TARGET_WINDOWS */
171 return kCFStringEncodingConverterUnavailable
;
174 CF_PRIVATE CFIndex
__CFStringEncodingPlatformBytesToUnicode(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, CFIndex
*usedByteLen
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
176 #if DEPLOYMENT_TARGET_WINDOWS
180 if ((kCFStringEncodingUTF7
!= encoding
) && (kCFStringEncodingGB_18030_2000
!= encoding
) && (0x0800 != (encoding
& 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
181 dwFlags
|= (flags
& (kCFStringEncodingAllowLossyConversion
|kCFStringEncodingSubstituteCombinings
) ? 0 : MB_ERR_INVALID_CHARS
);
182 dwFlags
|= (flags
& (kCFStringEncodingUseCanonical
|kCFStringEncodingUseHFSPlusCanonical
) ? MB_COMPOSITE
: MB_PRECOMPOSED
);
185 if ((usedLen
= MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCSTR
)bytes
, numBytes
, (LPWSTR
)characters
, maxCharLen
)) == 0) {
186 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER
) {
189 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding
), &cpInfo
)) {
190 cpInfo
.MaxCharSize
= 1; // Is this right ???
192 if (cpInfo
.MaxCharSize
== 1) {
193 numBytes
= maxCharLen
;
195 usedLen
= MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCSTR
)bytes
, numBytes
, (LPWSTR
)characters
, maxCharLen
);
196 usedLen
-= maxCharLen
;
197 numBytes
= (numBytes
> usedLen
? numBytes
- usedLen
: 1);
199 while ((usedLen
= MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCSTR
)bytes
, numBytes
, (LPWSTR
)characters
, maxCharLen
)) == 0) {
200 if ((--numBytes
) == 0) break;
202 if (usedCharLen
) *usedCharLen
= usedLen
;
203 if (usedByteLen
) *usedByteLen
= numBytes
;
205 return kCFStringEncodingInsufficientOutputBufferLength
;
207 return kCFStringEncodingInvalidInputStream
;
210 if (usedCharLen
) *usedCharLen
= usedLen
;
211 if (usedByteLen
) *usedByteLen
= numBytes
;
212 return kCFStringEncodingConversionSuccess
;
214 #endif /* DEPLOYMENT_TARGET_WINDOWS */
216 return kCFStringEncodingConverterUnavailable
;
219 CF_PRIVATE CFIndex
__CFStringEncodingPlatformCharLengthForBytes(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
) {
221 return (__CFStringEncodingPlatformBytesToUnicode(encoding
, flags
, bytes
, numBytes
, NULL
, NULL
, 0, &usedCharLen
) == kCFStringEncodingConversionSuccess
? usedCharLen
: 0);
224 CF_PRIVATE CFIndex
__CFStringEncodingPlatformByteLengthForCharacters(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
) {
226 return (__CFStringEncodingPlatformUnicodeToBytes(encoding
, flags
, characters
, numChars
, NULL
, NULL
, 0, &usedByteLen
) == kCFStringEncodingConversionSuccess
? usedByteLen
: 0);
229 #undef __CFCarbonCore_GetTextEncodingBase0