]> git.saurik.com Git - apple/cf.git/blob - CFPlatformConverters.c
CF-1153.18.tar.gz
[apple/cf.git] / CFPlatformConverters.c
1 /*
2 * Copyright (c) 2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFPlatformConverters.c
25 Copyright (c) 1998-2014, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFString.h>
31 #include "CFStringEncodingConverterExt.h"
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFUniChar.h"
34 #include "CFUnicodeDecomposition.h"
35 #include "CFStringEncodingConverterPriv.h"
36 #include "CFICUConverters.h"
37
38
39 CF_INLINE bool __CFIsPlatformConverterAvailable(int encoding) {
40
41 #if DEPLOYMENT_TARGET_WINDOWS
42 return (IsValidCodePage(CFStringConvertEncodingToWindowsCodepage(encoding)) ? true : false);
43 #else
44 return false;
45 #endif
46 }
47
48 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
49
50 static const CFStringEncodingConverter __CFICUBootstrap = {
51 NULL /* toBytes */, NULL /* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
52 kCFStringEncodingConverterICU /* encodingClass */,
53 NULL /* toBytesLen */, NULL /* toUnicodeLen */, NULL /* toBytesFallback */,
54 NULL /* toUnicodeFallback */, NULL /* toBytesPrecompose */, NULL, /* isValidCombiningChar */
55 };
56
57 #endif
58
59 static const CFStringEncodingConverter __CFPlatformBootstrap = {
60 NULL /* toBytes */, NULL /* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
61 kCFStringEncodingConverterPlatformSpecific /* encodingClass */,
62 NULL /* toBytesLen */, NULL /* toUnicodeLen */, NULL /* toBytesFallback */,
63 NULL /* toUnicodeFallback */, NULL /* toBytesPrecompose */, NULL, /* isValidCombiningChar */
64 };
65
66 CF_PRIVATE const CFStringEncodingConverter *__CFStringEncodingGetExternalConverter(uint32_t encoding) {
67
68 // we prefer Text Encoding Converter ICU since it's more reliable
69 if (__CFIsPlatformConverterAvailable(encoding)) {
70 return &__CFPlatformBootstrap;
71 } else {
72 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
73 if (__CFStringEncodingGetICUName(encoding)) {
74 return &__CFICUBootstrap;
75 }
76 #endif
77 return NULL;
78 }
79 }
80
81 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
82 CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) {
83
84 return NULL;
85 }
86 #elif DEPLOYMENT_TARGET_WINDOWS
87
88 #include <tchar.h>
89
90 static uint32_t __CFWin32EncodingIndex = 0;
91 static CFStringEncoding *__CFWin32EncodingList = NULL;
92
93 static char CALLBACK __CFWin32EnumCodePageProc(LPTSTR string) {
94 uint32_t encoding = CFStringConvertWindowsCodepageToEncoding(_tcstoul(string, NULL, 10));
95 CFIndex idx;
96
97 if (encoding != kCFStringEncodingInvalidId) { // We list only encodings we know
98 if (__CFWin32EncodingList) {
99 for (idx = 0;idx < (CFIndex)__CFWin32EncodingIndex;idx++) if (__CFWin32EncodingList[idx] == encoding) break;
100 if (idx != __CFWin32EncodingIndex) return true;
101 __CFWin32EncodingList[__CFWin32EncodingIndex] = encoding;
102 }
103 ++__CFWin32EncodingIndex;
104 }
105 return true;
106 }
107
108 CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) {
109 CFStringEncoding *encodings;
110
111 EnumSystemCodePages((CODEPAGE_ENUMPROC)&__CFWin32EnumCodePageProc, CP_INSTALLED);
112 __CFWin32EncodingList = (uint32_t *)CFAllocatorAllocate(allocator, sizeof(uint32_t) * __CFWin32EncodingIndex, 0);
113 EnumSystemCodePages((CODEPAGE_ENUMPROC)&__CFWin32EnumCodePageProc, CP_INSTALLED);
114
115 *numberOfConverters = __CFWin32EncodingIndex;
116 encodings = __CFWin32EncodingList;
117
118 __CFWin32EncodingIndex = 0;
119 __CFWin32EncodingList = NULL;
120
121 return encodings;
122 }
123 #else
124 CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) { return NULL; }
125 #endif
126
127 CF_PRIVATE CFIndex __CFStringEncodingPlatformUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
128
129 #if DEPLOYMENT_TARGET_WINDOWS
130 WORD dwFlags = 0;
131 CFIndex usedLen;
132
133 if ((kCFStringEncodingUTF7 != encoding) && (kCFStringEncodingGB_18030_2000 != encoding) && (0x0800 != (encoding & 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
134 dwFlags |= (flags & (kCFStringEncodingAllowLossyConversion|kCFStringEncodingSubstituteCombinings) ? WC_DEFAULTCHAR : 0);
135 dwFlags |= (flags & kCFStringEncodingComposeCombinings ? WC_COMPOSITECHECK : 0);
136 dwFlags |= (flags & kCFStringEncodingIgnoreCombinings ? WC_DISCARDNS : 0);
137 }
138
139 if ((usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, (LPSTR)bytes, maxByteLen, NULL, NULL)) == 0) {
140 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
141 CPINFO cpInfo;
142
143 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding), &cpInfo)) {
144 cpInfo.MaxCharSize = 1; // Is this right ???
145 }
146 if (cpInfo.MaxCharSize == 1) {
147 numChars = maxByteLen;
148 } else {
149 usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, NULL, 0, NULL, NULL);
150 usedLen -= maxByteLen;
151 numChars = (numChars > usedLen ? numChars - usedLen : 1);
152 }
153 if (WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, (LPSTR)bytes, maxByteLen, NULL, NULL) == 0) {
154 if (usedCharLen) *usedCharLen = 0;
155 if (usedByteLen) *usedByteLen = 0;
156 } else {
157 CFIndex lastUsedLen = 0;
158
159 while ((usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, ++numChars, (LPSTR)bytes, maxByteLen, NULL, NULL))) lastUsedLen = usedLen;
160 if (usedCharLen) *usedCharLen = (numChars - 1);
161 if (usedByteLen) *usedByteLen = lastUsedLen;
162 }
163
164 return kCFStringEncodingInsufficientOutputBufferLength;
165 } else {
166 return kCFStringEncodingInvalidInputStream;
167 }
168 } else {
169 if (usedCharLen) *usedCharLen = numChars;
170 if (usedByteLen) *usedByteLen = usedLen;
171 return kCFStringEncodingConversionSuccess;
172 }
173 #endif /* DEPLOYMENT_TARGET_WINDOWS */
174
175 return kCFStringEncodingConverterUnavailable;
176 }
177
178 CF_PRIVATE CFIndex __CFStringEncodingPlatformBytesToUnicode(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, CFIndex *usedByteLen, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
179
180 #if DEPLOYMENT_TARGET_WINDOWS
181 WORD dwFlags = 0;
182 CFIndex usedLen;
183
184 if ((kCFStringEncodingUTF7 != encoding) && (kCFStringEncodingGB_18030_2000 != encoding) && (0x0800 != (encoding & 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
185 dwFlags |= (flags & (kCFStringEncodingAllowLossyConversion|kCFStringEncodingSubstituteCombinings) ? 0 : MB_ERR_INVALID_CHARS);
186 dwFlags |= (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? MB_COMPOSITE : MB_PRECOMPOSED);
187 }
188
189 if ((usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen)) == 0) {
190 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
191 CPINFO cpInfo;
192
193 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding), &cpInfo)) {
194 cpInfo.MaxCharSize = 1; // Is this right ???
195 }
196 if (cpInfo.MaxCharSize == 1) {
197 numBytes = maxCharLen;
198 } else {
199 usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen);
200 usedLen -= maxCharLen;
201 numBytes = (numBytes > usedLen ? numBytes - usedLen : 1);
202 }
203 while ((usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen)) == 0) {
204 if ((--numBytes) == 0) break;
205 }
206 if (usedCharLen) *usedCharLen = usedLen;
207 if (usedByteLen) *usedByteLen = numBytes;
208
209 return kCFStringEncodingInsufficientOutputBufferLength;
210 } else {
211 return kCFStringEncodingInvalidInputStream;
212 }
213 } else {
214 if (usedCharLen) *usedCharLen = usedLen;
215 if (usedByteLen) *usedByteLen = numBytes;
216 return kCFStringEncodingConversionSuccess;
217 }
218 #endif /* DEPLOYMENT_TARGET_WINDOWS */
219
220 return kCFStringEncodingConverterUnavailable;
221 }
222
223 CF_PRIVATE CFIndex __CFStringEncodingPlatformCharLengthForBytes(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes) {
224 CFIndex usedCharLen;
225 return (__CFStringEncodingPlatformBytesToUnicode(encoding, flags, bytes, numBytes, NULL, NULL, 0, &usedCharLen) == kCFStringEncodingConversionSuccess ? usedCharLen : 0);
226 }
227
228 CF_PRIVATE CFIndex __CFStringEncodingPlatformByteLengthForCharacters(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars) {
229 CFIndex usedByteLen;
230 return (__CFStringEncodingPlatformUnicodeToBytes(encoding, flags, characters, numChars, NULL, NULL, 0, &usedByteLen) == kCFStringEncodingConversionSuccess ? usedByteLen : 0);
231 }
232
233 #undef __CFCarbonCore_GetTextEncodingBase0
234