2 * Copyright (c) 2015 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /* CFPlatformConverters.c
25 Copyright (c) 1998-2014, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFString.h>
31 #include "CFStringEncodingConverterExt.h"
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFUniChar.h"
34 #include "CFUnicodeDecomposition.h"
35 #include "CFStringEncodingConverterPriv.h"
36 #include "CFICUConverters.h"
39 CF_INLINE
bool __CFIsPlatformConverterAvailable(int encoding
) {
41 #if DEPLOYMENT_TARGET_WINDOWS
42 return (IsValidCodePage(CFStringConvertEncodingToWindowsCodepage(encoding
)) ? true : false);
48 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
50 static const CFStringEncodingConverter __CFICUBootstrap
= {
51 NULL
/* toBytes */, NULL
/* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
52 kCFStringEncodingConverterICU
/* encodingClass */,
53 NULL
/* toBytesLen */, NULL
/* toUnicodeLen */, NULL
/* toBytesFallback */,
54 NULL
/* toUnicodeFallback */, NULL
/* toBytesPrecompose */, NULL
, /* isValidCombiningChar */
59 static const CFStringEncodingConverter __CFPlatformBootstrap
= {
60 NULL
/* toBytes */, NULL
/* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
61 kCFStringEncodingConverterPlatformSpecific
/* encodingClass */,
62 NULL
/* toBytesLen */, NULL
/* toUnicodeLen */, NULL
/* toBytesFallback */,
63 NULL
/* toUnicodeFallback */, NULL
/* toBytesPrecompose */, NULL
, /* isValidCombiningChar */
66 CF_PRIVATE
const CFStringEncodingConverter
*__CFStringEncodingGetExternalConverter(uint32_t encoding
) {
68 // we prefer Text Encoding Converter ICU since it's more reliable
69 if (__CFIsPlatformConverterAvailable(encoding
)) {
70 return &__CFPlatformBootstrap
;
72 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
73 if (__CFStringEncodingGetICUName(encoding
)) {
74 return &__CFICUBootstrap
;
81 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
82 CF_PRIVATE CFStringEncoding
*__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator
, CFIndex
*numberOfConverters
) {
86 #elif DEPLOYMENT_TARGET_WINDOWS
90 static uint32_t __CFWin32EncodingIndex
= 0;
91 static CFStringEncoding
*__CFWin32EncodingList
= NULL
;
93 static char CALLBACK
__CFWin32EnumCodePageProc(LPTSTR string
) {
94 uint32_t encoding
= CFStringConvertWindowsCodepageToEncoding(_tcstoul(string
, NULL
, 10));
97 if (encoding
!= kCFStringEncodingInvalidId
) { // We list only encodings we know
98 if (__CFWin32EncodingList
) {
99 for (idx
= 0;idx
< (CFIndex
)__CFWin32EncodingIndex
;idx
++) if (__CFWin32EncodingList
[idx
] == encoding
) break;
100 if (idx
!= __CFWin32EncodingIndex
) return true;
101 __CFWin32EncodingList
[__CFWin32EncodingIndex
] = encoding
;
103 ++__CFWin32EncodingIndex
;
108 CF_PRIVATE CFStringEncoding
*__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator
, CFIndex
*numberOfConverters
) {
109 CFStringEncoding
*encodings
;
111 EnumSystemCodePages((CODEPAGE_ENUMPROC
)&__CFWin32EnumCodePageProc
, CP_INSTALLED
);
112 __CFWin32EncodingList
= (uint32_t *)CFAllocatorAllocate(allocator
, sizeof(uint32_t) * __CFWin32EncodingIndex
, 0);
113 EnumSystemCodePages((CODEPAGE_ENUMPROC
)&__CFWin32EnumCodePageProc
, CP_INSTALLED
);
115 *numberOfConverters
= __CFWin32EncodingIndex
;
116 encodings
= __CFWin32EncodingList
;
118 __CFWin32EncodingIndex
= 0;
119 __CFWin32EncodingList
= NULL
;
124 CF_PRIVATE CFStringEncoding
*__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator
, CFIndex
*numberOfConverters
) { return NULL
; }
127 CF_PRIVATE CFIndex
__CFStringEncodingPlatformUnicodeToBytes(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, CFIndex
*usedCharLen
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
129 #if DEPLOYMENT_TARGET_WINDOWS
133 if ((kCFStringEncodingUTF7
!= encoding
) && (kCFStringEncodingGB_18030_2000
!= encoding
) && (0x0800 != (encoding
& 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
134 dwFlags
|= (flags
& (kCFStringEncodingAllowLossyConversion
|kCFStringEncodingSubstituteCombinings
) ? WC_DEFAULTCHAR
: 0);
135 dwFlags
|= (flags
& kCFStringEncodingComposeCombinings
? WC_COMPOSITECHECK
: 0);
136 dwFlags
|= (flags
& kCFStringEncodingIgnoreCombinings
? WC_DISCARDNS
: 0);
139 if ((usedLen
= WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, numChars
, (LPSTR
)bytes
, maxByteLen
, NULL
, NULL
)) == 0) {
140 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER
) {
143 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding
), &cpInfo
)) {
144 cpInfo
.MaxCharSize
= 1; // Is this right ???
146 if (cpInfo
.MaxCharSize
== 1) {
147 numChars
= maxByteLen
;
149 usedLen
= WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, numChars
, NULL
, 0, NULL
, NULL
);
150 usedLen
-= maxByteLen
;
151 numChars
= (numChars
> usedLen
? numChars
- usedLen
: 1);
153 if (WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, numChars
, (LPSTR
)bytes
, maxByteLen
, NULL
, NULL
) == 0) {
154 if (usedCharLen
) *usedCharLen
= 0;
155 if (usedByteLen
) *usedByteLen
= 0;
157 CFIndex lastUsedLen
= 0;
159 while ((usedLen
= WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCWSTR
)characters
, ++numChars
, (LPSTR
)bytes
, maxByteLen
, NULL
, NULL
))) lastUsedLen
= usedLen
;
160 if (usedCharLen
) *usedCharLen
= (numChars
- 1);
161 if (usedByteLen
) *usedByteLen
= lastUsedLen
;
164 return kCFStringEncodingInsufficientOutputBufferLength
;
166 return kCFStringEncodingInvalidInputStream
;
169 if (usedCharLen
) *usedCharLen
= numChars
;
170 if (usedByteLen
) *usedByteLen
= usedLen
;
171 return kCFStringEncodingConversionSuccess
;
173 #endif /* DEPLOYMENT_TARGET_WINDOWS */
175 return kCFStringEncodingConverterUnavailable
;
178 CF_PRIVATE CFIndex
__CFStringEncodingPlatformBytesToUnicode(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, CFIndex
*usedByteLen
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
180 #if DEPLOYMENT_TARGET_WINDOWS
184 if ((kCFStringEncodingUTF7
!= encoding
) && (kCFStringEncodingGB_18030_2000
!= encoding
) && (0x0800 != (encoding
& 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
185 dwFlags
|= (flags
& (kCFStringEncodingAllowLossyConversion
|kCFStringEncodingSubstituteCombinings
) ? 0 : MB_ERR_INVALID_CHARS
);
186 dwFlags
|= (flags
& (kCFStringEncodingUseCanonical
|kCFStringEncodingUseHFSPlusCanonical
) ? MB_COMPOSITE
: MB_PRECOMPOSED
);
189 if ((usedLen
= MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCSTR
)bytes
, numBytes
, (LPWSTR
)characters
, maxCharLen
)) == 0) {
190 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER
) {
193 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding
), &cpInfo
)) {
194 cpInfo
.MaxCharSize
= 1; // Is this right ???
196 if (cpInfo
.MaxCharSize
== 1) {
197 numBytes
= maxCharLen
;
199 usedLen
= MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCSTR
)bytes
, numBytes
, (LPWSTR
)characters
, maxCharLen
);
200 usedLen
-= maxCharLen
;
201 numBytes
= (numBytes
> usedLen
? numBytes
- usedLen
: 1);
203 while ((usedLen
= MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding
), dwFlags
, (LPCSTR
)bytes
, numBytes
, (LPWSTR
)characters
, maxCharLen
)) == 0) {
204 if ((--numBytes
) == 0) break;
206 if (usedCharLen
) *usedCharLen
= usedLen
;
207 if (usedByteLen
) *usedByteLen
= numBytes
;
209 return kCFStringEncodingInsufficientOutputBufferLength
;
211 return kCFStringEncodingInvalidInputStream
;
214 if (usedCharLen
) *usedCharLen
= usedLen
;
215 if (usedByteLen
) *usedByteLen
= numBytes
;
216 return kCFStringEncodingConversionSuccess
;
218 #endif /* DEPLOYMENT_TARGET_WINDOWS */
220 return kCFStringEncodingConverterUnavailable
;
223 CF_PRIVATE CFIndex
__CFStringEncodingPlatformCharLengthForBytes(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
) {
225 return (__CFStringEncodingPlatformBytesToUnicode(encoding
, flags
, bytes
, numBytes
, NULL
, NULL
, 0, &usedCharLen
) == kCFStringEncodingConversionSuccess
? usedCharLen
: 0);
228 CF_PRIVATE CFIndex
__CFStringEncodingPlatformByteLengthForCharacters(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
) {
230 return (__CFStringEncodingPlatformUnicodeToBytes(encoding
, flags
, characters
, numChars
, NULL
, NULL
, 0, &usedByteLen
) == kCFStringEncodingConversionSuccess
? usedByteLen
: 0);
233 #undef __CFCarbonCore_GetTextEncodingBase0