]>
Commit | Line | Data |
---|---|---|
9ce05555 | 1 | /* |
bd5b749c | 2 | * Copyright (c) 2008 Apple Inc. All rights reserved. |
9ce05555 A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
9ce05555 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | /* CFStringUtilities.c | |
24 | Copyright 1999-2002, Apple, Inc. All rights reserved. | |
25 | Responsibility: Aki Inoue | |
26 | */ | |
27 | ||
28 | #include "CFInternal.h" | |
29 | #include "CFStringEncodingConverterExt.h" | |
30 | #include "CFUniChar.h" | |
d8925383 | 31 | #include <CoreFoundation/CFStringEncodingExt.h> |
bd5b749c | 32 | #include <CoreFoundation/CFPreferences.h> |
9ce05555 | 33 | #include <limits.h> |
bd5b749c | 34 | #if (DEPLOYMENT_TARGET_MACOSX) || DEPLOYMENT_TARGET_LINUX |
9ce05555 A |
35 | #include <stdlib.h> |
36 | #elif defined(__WIN32__) | |
37 | #include <stdlib.h> | |
38 | #include <tchar.h> | |
39 | #endif | |
40 | ||
41 | ||
9ce05555 A |
42 | Boolean CFStringIsEncodingAvailable(CFStringEncoding theEncoding) { |
43 | switch (theEncoding) { | |
44 | case kCFStringEncodingASCII: // Built-in encodings | |
45 | case kCFStringEncodingMacRoman: | |
9ce05555 A |
46 | case kCFStringEncodingUTF8: |
47 | case kCFStringEncodingNonLossyASCII: | |
48 | case kCFStringEncodingWindowsLatin1: | |
49 | case kCFStringEncodingNextStepLatin: | |
d8925383 A |
50 | case kCFStringEncodingUTF16: |
51 | case kCFStringEncodingUTF16BE: | |
52 | case kCFStringEncodingUTF16LE: | |
53 | case kCFStringEncodingUTF32: | |
54 | case kCFStringEncodingUTF32BE: | |
55 | case kCFStringEncodingUTF32LE: | |
9ce05555 A |
56 | return true; |
57 | ||
58 | default: | |
59 | return CFStringEncodingIsValidEncoding(theEncoding); | |
60 | } | |
61 | } | |
62 | ||
63 | const CFStringEncoding* CFStringGetListOfAvailableEncodings() { | |
bd5b749c | 64 | return (const CFStringEncoding *)CFStringEncodingListOfAvailableEncodings(); |
9ce05555 A |
65 | } |
66 | ||
67 | CFStringRef CFStringGetNameOfEncoding(CFStringEncoding theEncoding) { | |
68 | static CFMutableDictionaryRef mappingTable = NULL; | |
bd5b749c | 69 | CFStringRef theName = mappingTable ? (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)(uintptr_t)theEncoding) : NULL; |
9ce05555 A |
70 | |
71 | if (!theName) { | |
d8925383 A |
72 | switch (theEncoding) { |
73 | case kCFStringEncodingUTF8: theName = CFSTR("Unicode (UTF-8)"); break; | |
74 | case kCFStringEncodingUTF16: theName = CFSTR("Unicode (UTF-16)"); break; | |
75 | case kCFStringEncodingUTF16BE: theName = CFSTR("Unicode (UTF-16BE)"); break; | |
76 | case kCFStringEncodingUTF16LE: theName = CFSTR("Unicode (UTF-16LE)"); break; | |
77 | case kCFStringEncodingUTF32: theName = CFSTR("Unicode (UTF-32)"); break; | |
78 | case kCFStringEncodingUTF32BE: theName = CFSTR("Unicode (UTF-32BE)"); break; | |
79 | case kCFStringEncodingUTF32LE: theName = CFSTR("Unicode (UTF-32LE)"); break; | |
80 | case kCFStringEncodingNonLossyASCII: theName = CFSTR("Non-lossy ASCII"); break; | |
81 | ||
82 | default: { | |
bd5b749c | 83 | const char *encodingName = CFStringEncodingName(theEncoding); |
d8925383 A |
84 | |
85 | if (encodingName) { | |
bd5b749c | 86 | theName = CFStringCreateWithCString(kCFAllocatorSystemDefault, encodingName, kCFStringEncodingASCII); |
d8925383 | 87 | } |
9ce05555 | 88 | } |
d8925383 A |
89 | break; |
90 | } | |
9ce05555 | 91 | |
d8925383 | 92 | if (theName) { |
bd5b749c | 93 | if (!mappingTable) mappingTable = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks); |
9ce05555 | 94 | |
bd5b749c | 95 | CFDictionaryAddValue(mappingTable, (const void*)(uintptr_t)theEncoding, (const void*)theName); |
d8925383 | 96 | CFRelease(theName); |
9ce05555 A |
97 | } |
98 | } | |
99 | ||
100 | return theName; | |
101 | } | |
102 | ||
103 | CFStringEncoding CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName) { | |
104 | static CFMutableDictionaryRef mappingTable = NULL; | |
105 | CFStringEncoding result = kCFStringEncodingInvalidId; | |
d8925383 | 106 | CFMutableStringRef lowerCharsetName; |
9ce05555 A |
107 | |
108 | /* Check for common encodings first */ | |
d8925383 | 109 | if (CFStringCompare(charsetName, CFSTR("utf-8"), kCFCompareCaseInsensitive) == kCFCompareEqualTo) { |
9ce05555 | 110 | return kCFStringEncodingUTF8; |
d8925383 | 111 | } else if (CFStringCompare(charsetName, CFSTR("iso-8859-1"), kCFCompareCaseInsensitive) == kCFCompareEqualTo) { |
9ce05555 | 112 | return kCFStringEncodingISOLatin1; |
9ce05555 A |
113 | } |
114 | ||
d8925383 | 115 | /* Create lowercase copy */ |
bd5b749c | 116 | lowerCharsetName = CFStringCreateMutableCopy(kCFAllocatorSystemDefault, 0, charsetName); |
d8925383 A |
117 | CFStringLowercase(lowerCharsetName, NULL); |
118 | ||
9ce05555 | 119 | if (mappingTable == NULL) { |
bd5b749c | 120 | CFMutableDictionaryRef table = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, &kCFTypeDictionaryKeyCallBacks, (const CFDictionaryValueCallBacks *)NULL); |
9ce05555 A |
121 | const CFStringEncoding *encodings = CFStringGetListOfAvailableEncodings(); |
122 | ||
123 | while (*encodings != kCFStringEncodingInvalidId) { | |
124 | const char **nameList = CFStringEncodingCanonicalCharsetNames(*encodings); | |
125 | ||
126 | if (nameList) { | |
127 | while (*nameList) { | |
bd5b749c | 128 | CFStringRef name = CFStringCreateWithCString(kCFAllocatorSystemDefault, *nameList++, kCFStringEncodingASCII); |
9ce05555 A |
129 | |
130 | if (name) { | |
bd5b749c | 131 | CFDictionaryAddValue(table, (const void*)name, (const void*)(uintptr_t)*encodings); |
9ce05555 A |
132 | CFRelease(name); |
133 | } | |
134 | } | |
135 | } | |
136 | encodings++; | |
137 | } | |
d8925383 A |
138 | // Adding Unicode names |
139 | CFDictionaryAddValue(table, (const void*)CFSTR("unicode-1-1"), (const void*)kCFStringEncodingUTF16); | |
140 | CFDictionaryAddValue(table, (const void*)CFSTR("iso-10646-ucs-2"), (const void*)kCFStringEncodingUTF16); | |
141 | CFDictionaryAddValue(table, (const void*)CFSTR("utf-16"), (const void*)kCFStringEncodingUTF16); | |
142 | CFDictionaryAddValue(table, (const void*)CFSTR("utf-16be"), (const void*)kCFStringEncodingUTF16BE); | |
143 | CFDictionaryAddValue(table, (const void*)CFSTR("utf-16le"), (const void*)kCFStringEncodingUTF16LE); | |
144 | CFDictionaryAddValue(table, (const void*)CFSTR("utf-32"), (const void*)kCFStringEncodingUTF32); | |
145 | CFDictionaryAddValue(table, (const void*)CFSTR("utf-32be"), (const void*)kCFStringEncodingUTF32BE); | |
146 | CFDictionaryAddValue(table, (const void*)CFSTR("utf-32le"), (const void*)kCFStringEncodingUTF32LE); | |
9ce05555 A |
147 | |
148 | mappingTable = table; | |
149 | } | |
150 | ||
151 | if (CFDictionaryContainsKey(mappingTable, (const void*)lowerCharsetName)) { | |
bd5b749c | 152 | result = (CFStringEncoding)(uintptr_t)CFDictionaryGetValue(mappingTable, (const void*)lowerCharsetName); |
9ce05555 A |
153 | } |
154 | ||
155 | CFRelease(lowerCharsetName); | |
156 | ||
157 | return result; | |
158 | } | |
159 | ||
160 | CFStringRef CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding) { | |
161 | static CFMutableDictionaryRef mappingTable = NULL; | |
bd5b749c | 162 | CFStringRef theName = mappingTable ? (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)(uintptr_t)encoding) : NULL; |
9ce05555 A |
163 | |
164 | if (!theName) { | |
d8925383 A |
165 | switch (encoding) { |
166 | case kCFStringEncodingUTF16: theName = CFSTR("UTF-16"); break; | |
167 | case kCFStringEncodingUTF16BE: theName = CFSTR("UTF-16BE"); break; | |
168 | case kCFStringEncodingUTF16LE: theName = CFSTR("UTF-16LE"); break; | |
169 | case kCFStringEncodingUTF32: theName = CFSTR("UTF-32"); break; | |
170 | case kCFStringEncodingUTF32BE: theName = CFSTR("UTF-32BE"); break; | |
171 | case kCFStringEncodingUTF32LE: theName = CFSTR("UTF-32LE"); break; | |
172 | ||
173 | ||
174 | default: { | |
175 | const char **nameList = CFStringEncodingCanonicalCharsetNames(encoding); | |
176 | ||
177 | if (nameList && *nameList) { | |
178 | CFMutableStringRef upperCaseName; | |
179 | ||
bd5b749c | 180 | theName = CFStringCreateWithCString(kCFAllocatorSystemDefault, *nameList, kCFStringEncodingASCII); |
d8925383 | 181 | if (theName) { |
bd5b749c | 182 | upperCaseName = CFStringCreateMutableCopy(kCFAllocatorSystemDefault, 0, theName); |
d8925383 A |
183 | CFStringUppercase(upperCaseName, 0); |
184 | CFRelease(theName); | |
185 | theName = upperCaseName; | |
186 | } | |
9ce05555 A |
187 | } |
188 | } | |
d8925383 A |
189 | break; |
190 | } | |
9ce05555 A |
191 | |
192 | if (theName) { | |
bd5b749c | 193 | if (!mappingTable) mappingTable = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks); |
9ce05555 | 194 | |
bd5b749c | 195 | CFDictionaryAddValue(mappingTable, (const void*)(uintptr_t)encoding, (const void*)theName); |
9ce05555 A |
196 | CFRelease(theName); |
197 | } | |
198 | } | |
199 | ||
200 | return theName; | |
201 | } | |
202 | ||
203 | enum { | |
204 | NSASCIIStringEncoding = 1, /* 0..127 only */ | |
205 | NSNEXTSTEPStringEncoding = 2, | |
206 | NSJapaneseEUCStringEncoding = 3, | |
207 | NSUTF8StringEncoding = 4, | |
208 | NSISOLatin1StringEncoding = 5, | |
209 | NSSymbolStringEncoding = 6, | |
210 | NSNonLossyASCIIStringEncoding = 7, | |
211 | NSShiftJISStringEncoding = 8, | |
212 | NSISOLatin2StringEncoding = 9, | |
213 | NSUnicodeStringEncoding = 10, | |
214 | NSWindowsCP1251StringEncoding = 11, /* Cyrillic; same as AdobeStandardCyrillic */ | |
215 | NSWindowsCP1252StringEncoding = 12, /* WinLatin1 */ | |
216 | NSWindowsCP1253StringEncoding = 13, /* Greek */ | |
217 | NSWindowsCP1254StringEncoding = 14, /* Turkish */ | |
218 | NSWindowsCP1250StringEncoding = 15, /* WinLatin2 */ | |
219 | NSISO2022JPStringEncoding = 21, /* ISO 2022 Japanese encoding for e-mail */ | |
220 | NSMacOSRomanStringEncoding = 30, | |
221 | ||
222 | NSProprietaryStringEncoding = 65536 /* Installation-specific encoding */ | |
223 | }; | |
224 | ||
225 | #define NSENCODING_MASK (1 << 31) | |
226 | ||
bd5b749c | 227 | unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding) { |
d8925383 | 228 | switch (theEncoding & 0xFFF) { |
9ce05555 A |
229 | case kCFStringEncodingASCII: return NSASCIIStringEncoding; |
230 | case kCFStringEncodingNextStepLatin: return NSNEXTSTEPStringEncoding; | |
231 | case kCFStringEncodingISOLatin1: return NSISOLatin1StringEncoding; | |
232 | case kCFStringEncodingNonLossyASCII: return NSNonLossyASCIIStringEncoding; | |
9ce05555 A |
233 | case kCFStringEncodingWindowsLatin1: return NSWindowsCP1252StringEncoding; |
234 | case kCFStringEncodingMacRoman: return NSMacOSRomanStringEncoding; | |
bd5b749c | 235 | #if DEPLOYMENT_TARGET_MACOSX |
d8925383 A |
236 | case kCFStringEncodingEUC_JP: return NSJapaneseEUCStringEncoding; |
237 | case kCFStringEncodingMacSymbol: return NSSymbolStringEncoding; | |
238 | case kCFStringEncodingDOSJapanese: return NSShiftJISStringEncoding; | |
239 | case kCFStringEncodingISOLatin2: return NSISOLatin2StringEncoding; | |
240 | case kCFStringEncodingWindowsCyrillic: return NSWindowsCP1251StringEncoding; | |
241 | case kCFStringEncodingWindowsGreek: return NSWindowsCP1253StringEncoding; | |
242 | case kCFStringEncodingWindowsLatin5: return NSWindowsCP1254StringEncoding; | |
243 | case kCFStringEncodingWindowsLatin2: return NSWindowsCP1250StringEncoding; | |
244 | case kCFStringEncodingISO_2022_JP: return NSISO2022JPStringEncoding; | |
bd5b749c A |
245 | #endif |
246 | #if DEPLOYMENT_TARGET_MACOSX | |
d8925383 A |
247 | case kCFStringEncodingUnicode: |
248 | if (theEncoding == kCFStringEncodingUTF16) return NSUnicodeStringEncoding; | |
249 | else if (theEncoding == kCFStringEncodingUTF8) return NSUTF8StringEncoding; | |
bd5b749c | 250 | #endif |
d8925383 A |
251 | /* fall-through for other encoding schemes */ |
252 | ||
9ce05555 A |
253 | default: |
254 | return NSENCODING_MASK | theEncoding; | |
255 | } | |
256 | } | |
257 | ||
bd5b749c | 258 | CFStringEncoding CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding) { |
9ce05555 A |
259 | switch (theEncoding) { |
260 | case NSASCIIStringEncoding: return kCFStringEncodingASCII; | |
261 | case NSNEXTSTEPStringEncoding: return kCFStringEncodingNextStepLatin; | |
262 | case NSUTF8StringEncoding: return kCFStringEncodingUTF8; | |
263 | case NSISOLatin1StringEncoding: return kCFStringEncodingISOLatin1; | |
264 | case NSNonLossyASCIIStringEncoding: return kCFStringEncodingNonLossyASCII; | |
d8925383 | 265 | case NSUnicodeStringEncoding: return kCFStringEncodingUTF16; |
9ce05555 A |
266 | case NSWindowsCP1252StringEncoding: return kCFStringEncodingWindowsLatin1; |
267 | case NSMacOSRomanStringEncoding: return kCFStringEncodingMacRoman; | |
bd5b749c | 268 | #if DEPLOYMENT_TARGET_MACOSX |
d8925383 A |
269 | case NSSymbolStringEncoding: return kCFStringEncodingMacSymbol; |
270 | case NSJapaneseEUCStringEncoding: return kCFStringEncodingEUC_JP; | |
271 | case NSShiftJISStringEncoding: return kCFStringEncodingDOSJapanese; | |
272 | case NSISO2022JPStringEncoding: return kCFStringEncodingISO_2022_JP; | |
273 | case NSISOLatin2StringEncoding: return kCFStringEncodingISOLatin2; | |
274 | case NSWindowsCP1251StringEncoding: return kCFStringEncodingWindowsCyrillic; | |
275 | case NSWindowsCP1253StringEncoding: return kCFStringEncodingWindowsGreek; | |
276 | case NSWindowsCP1254StringEncoding: return kCFStringEncodingWindowsLatin5; | |
277 | case NSWindowsCP1250StringEncoding: return kCFStringEncodingWindowsLatin2; | |
bd5b749c | 278 | #endif |
9ce05555 A |
279 | default: |
280 | return ((theEncoding & NSENCODING_MASK) ? theEncoding & ~NSENCODING_MASK : kCFStringEncodingInvalidId); | |
281 | } | |
282 | } | |
283 | ||
284 | #define MACCODEPAGE_BASE (10000) | |
285 | #define ISO8859CODEPAGE_BASE (28590) | |
286 | ||
287 | static const uint16_t _CFToDOSCodePageList[] = { | |
288 | 437, -1, -1, -1, -1, 737, 775, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x400 | |
289 | 850, 851, 852, 855, 857, 860, 861, 862, 863, 864, 865, 866, 869, 874, -1, 01, // 0x410 | |
290 | 932, 936, 949 , 950, // 0x420 | |
291 | }; | |
292 | ||
293 | static const uint16_t _CFToWindowsCodePageList[] = { | |
d8925383 A |
294 | 1252, 1250, 1251, 1253, 1254, 1255, 1256, 1257, 1258, |
295 | }; | |
296 | ||
297 | static const uint16_t _CFEUCToCodePage[] = { // 0x900 | |
298 | 51932, 51936, 51950, 51949, | |
9ce05555 A |
299 | }; |
300 | ||
301 | UInt32 CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding) { | |
bd5b749c | 302 | #if DEPLOYMENT_TARGET_MACOSX |
d8925383 A |
303 | CFStringEncoding encodingBase = theEncoding & 0x0FFF; |
304 | #endif | |
305 | ||
306 | switch (theEncoding & 0x0F00) { | |
bd5b749c | 307 | #if DEPLOYMENT_TARGET_MACOSX |
d8925383 A |
308 | case 0: // Mac OS script |
309 | if (encodingBase <= kCFStringEncodingMacCentralEurRoman) { | |
310 | return MACCODEPAGE_BASE + encodingBase; | |
311 | } else if (encodingBase == kCFStringEncodingMacTurkish) { | |
312 | return 10081; | |
313 | } else if (encodingBase == kCFStringEncodingMacCroatian) { | |
314 | return 10082; | |
315 | } else if (encodingBase == kCFStringEncodingMacIcelandic) { | |
316 | return 10079; | |
317 | } | |
318 | break; | |
319 | #endif | |
320 | ||
321 | case 0x100: // Unicode | |
322 | switch (theEncoding) { | |
323 | case kCFStringEncodingUTF8: return 65001; | |
324 | case kCFStringEncodingUTF16: return 1200; | |
325 | case kCFStringEncodingUTF16BE: return 1201; | |
326 | case kCFStringEncodingUTF32: return 65005; | |
327 | case kCFStringEncodingUTF32BE: return 65006; | |
328 | } | |
329 | break; | |
330 | ||
bd5b749c | 331 | #if (DEPLOYMENT_TARGET_MACOSX) |
d8925383 A |
332 | case 0x0200: // ISO 8859 series |
333 | if (encodingBase <= kCFStringEncodingISOLatin10) return ISO8859CODEPAGE_BASE + (encodingBase - 0x200); | |
334 | break; | |
335 | ||
336 | case 0x0400: // DOS codepage | |
337 | if (encodingBase <= kCFStringEncodingDOSChineseTrad) return _CFToDOSCodePageList[encodingBase - 0x400]; | |
338 | break; | |
339 | ||
340 | case 0x0500: // ANSI (Windows) codepage | |
341 | if (encodingBase <= kCFStringEncodingWindowsVietnamese) return _CFToWindowsCodePageList[theEncoding - 0x500]; | |
342 | else if (encodingBase == kCFStringEncodingWindowsKoreanJohab) return 1361; | |
343 | break; | |
344 | ||
345 | case 0x600: // National standards | |
346 | if (encodingBase == kCFStringEncodingASCII) return 20127; | |
347 | else if (encodingBase == kCFStringEncodingGB_18030_2000) return 54936; | |
348 | break; | |
349 | ||
350 | case 0x0800: // ISO 2022 series | |
351 | switch (encodingBase) { | |
352 | case kCFStringEncodingISO_2022_JP: return 50220; | |
353 | case kCFStringEncodingISO_2022_CN: return 50227; | |
354 | case kCFStringEncodingISO_2022_KR: return 50225; | |
355 | } | |
356 | break; | |
357 | ||
358 | case 0x0900: // EUC series | |
359 | if (encodingBase <= kCFStringEncodingEUC_KR) return _CFEUCToCodePage[encodingBase - 0x0900]; | |
360 | break; | |
361 | ||
362 | ||
363 | case 0x0A00: // Misc encodings | |
364 | switch (encodingBase) { | |
365 | case kCFStringEncodingKOI8_R: return 20866; | |
366 | case kCFStringEncodingHZ_GB_2312: return 52936; | |
367 | case kCFStringEncodingKOI8_U: return 21866; | |
368 | } | |
369 | break; | |
370 | ||
371 | case 0x0C00: // IBM EBCDIC encodings | |
372 | if (encodingBase == kCFStringEncodingEBCDIC_CP037) return 37; | |
373 | break; | |
bd5b749c | 374 | #endif |
9ce05555 | 375 | } |
d8925383 | 376 | |
9ce05555 A |
377 | return kCFStringEncodingInvalidId; |
378 | } | |
379 | ||
bd5b749c | 380 | #if DEPLOYMENT_TARGET_MACOSX |
9ce05555 A |
381 | static const struct { |
382 | uint16_t acp; | |
383 | uint16_t encoding; | |
384 | } _CFACPToCFTable[] = { | |
d8925383 A |
385 | {37, kCFStringEncodingEBCDIC_CP037}, |
386 | {437, kCFStringEncodingDOSLatinUS}, | |
387 | {737, kCFStringEncodingDOSGreek}, | |
388 | {775, kCFStringEncodingDOSBalticRim}, | |
389 | {850, kCFStringEncodingDOSLatin1}, | |
390 | {851, kCFStringEncodingDOSGreek1}, | |
391 | {852, kCFStringEncodingDOSLatin2}, | |
392 | {855, kCFStringEncodingDOSCyrillic}, | |
393 | {857, kCFStringEncodingDOSTurkish}, | |
394 | {860, kCFStringEncodingDOSPortuguese}, | |
395 | {861, kCFStringEncodingDOSIcelandic}, | |
396 | {862, kCFStringEncodingDOSHebrew}, | |
397 | {863, kCFStringEncodingDOSCanadianFrench}, | |
398 | {864, kCFStringEncodingDOSArabic}, | |
399 | {865, kCFStringEncodingDOSNordic}, | |
400 | {866, kCFStringEncodingDOSRussian}, | |
401 | {869, kCFStringEncodingDOSGreek2}, | |
402 | {874, kCFStringEncodingDOSThai}, | |
403 | {932, kCFStringEncodingDOSJapanese}, | |
404 | {936, kCFStringEncodingDOSChineseSimplif}, | |
405 | {949, kCFStringEncodingDOSKorean}, | |
406 | {950, kCFStringEncodingDOSChineseTrad}, | |
407 | {1250, kCFStringEncodingWindowsLatin2}, | |
408 | {1251, kCFStringEncodingWindowsCyrillic}, | |
409 | {1252, kCFStringEncodingWindowsLatin1}, | |
410 | {1253, kCFStringEncodingWindowsGreek}, | |
411 | {1254, kCFStringEncodingWindowsLatin5}, | |
412 | {1255, kCFStringEncodingWindowsHebrew}, | |
413 | {1256, kCFStringEncodingWindowsArabic}, | |
414 | {1257, kCFStringEncodingWindowsBalticRim}, | |
415 | {1258, kCFStringEncodingWindowsVietnamese}, | |
416 | {1361, kCFStringEncodingWindowsKoreanJohab}, | |
417 | {20127, kCFStringEncodingASCII}, | |
418 | {20866, kCFStringEncodingKOI8_R}, | |
419 | {21866, kCFStringEncodingKOI8_U}, | |
420 | {50220, kCFStringEncodingISO_2022_JP}, | |
421 | {50225, kCFStringEncodingISO_2022_KR}, | |
422 | {50227, kCFStringEncodingISO_2022_CN}, | |
423 | {51932, kCFStringEncodingEUC_JP}, | |
424 | {51936, kCFStringEncodingEUC_CN}, | |
425 | {51949, kCFStringEncodingEUC_KR}, | |
426 | {51950, kCFStringEncodingEUC_TW}, | |
427 | {52936, kCFStringEncodingHZ_GB_2312}, | |
428 | {54936, kCFStringEncodingGB_18030_2000}, | |
9ce05555 A |
429 | }; |
430 | ||
d8925383 | 431 | static SInt32 bsearchEncoding(uint16_t target) { |
9ce05555 A |
432 | const unsigned int *start, *end, *divider; |
433 | unsigned int size = sizeof(_CFACPToCFTable) / sizeof(UInt32); | |
434 | ||
435 | start = (const unsigned int*)_CFACPToCFTable; end = (const unsigned int*)_CFACPToCFTable + (size - 1); | |
436 | while (start <= end) { | |
437 | divider = start + ((end - start) / 2); | |
438 | ||
d8925383 A |
439 | if (*(const uint16_t*)divider == target) return *((const uint16_t*)divider + 1); |
440 | else if (*(const uint16_t*)divider > target) end = divider - 1; | |
441 | else if (*(const uint16_t*)(divider + 1) > target) return *((const uint16_t*)divider + 1); | |
9ce05555 A |
442 | else start = divider + 1; |
443 | } | |
444 | return (kCFStringEncodingInvalidId); | |
445 | } | |
d8925383 | 446 | #endif |
9ce05555 A |
447 | |
448 | CFStringEncoding CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding) { | |
449 | if (theEncoding == 0 || theEncoding == 1) { // ID for default (system) codepage | |
450 | return CFStringGetSystemEncoding(); | |
d8925383 A |
451 | } else if ((theEncoding >= MACCODEPAGE_BASE) && (theEncoding < 20000)) { // Mac script |
452 | if (theEncoding <= 10029) return theEncoding - MACCODEPAGE_BASE; // up to Mac Central European | |
bd5b749c | 453 | #if (DEPLOYMENT_TARGET_MACOSX) |
d8925383 A |
454 | else if (theEncoding == 10079) return kCFStringEncodingMacIcelandic; |
455 | else if (theEncoding == 10081) return kCFStringEncodingMacTurkish; | |
456 | else if (theEncoding == 10082) return kCFStringEncodingMacCroatian; | |
457 | #endif | |
458 | } else if ((theEncoding >= ISO8859CODEPAGE_BASE) && (theEncoding <= 28605)) { // ISO 8859 | |
9ce05555 | 459 | return (theEncoding - ISO8859CODEPAGE_BASE) + 0x200; |
d8925383 A |
460 | } else if (theEncoding == 65001) { // UTF-8 |
461 | return kCFStringEncodingUTF8; | |
462 | } else if (theEncoding == 12000) { // UTF-16 | |
463 | return kCFStringEncodingUTF16; | |
464 | } else if (theEncoding == 12001) { // UTF-16BE | |
465 | return kCFStringEncodingUTF16BE; | |
466 | } else if (theEncoding == 65005) { // UTF-32 | |
467 | return kCFStringEncodingUTF32; | |
468 | } else if (theEncoding == 65006) { // UTF-32BE | |
469 | return kCFStringEncodingUTF32BE; | |
9ce05555 | 470 | } else { |
bd5b749c | 471 | #if DEPLOYMENT_TARGET_MACOSX |
d8925383 A |
472 | return bsearchEncoding(theEncoding); |
473 | #endif | |
9ce05555 | 474 | } |
d8925383 A |
475 | |
476 | return kCFStringEncodingInvalidId; | |
9ce05555 A |
477 | } |
478 | ||
479 | CFStringEncoding CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding) { | |
480 | CFStringEncoding macEncoding; | |
481 | ||
482 | macEncoding = CFStringEncodingGetScriptCodeForEncoding(encoding); | |
483 | ||
484 | return macEncoding; | |
485 | } | |
486 | ||
d8925383 | 487 |