]> git.saurik.com Git - apple/cf.git/blob - CFStringEncodingDatabase.c
715edd5e1e9e4587e0db2b3bb42fb191a4c8d581
[apple/cf.git] / CFStringEncodingDatabase.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /*
24 * CFStringEncodingDatabase.c
25 * CoreFoundation
26 *
27 * Created by Aki Inoue on 07/12/05.
28 * Copyright 2007-2009, Apple Inc. All rights reserved.
29 *
30 */
31
32 #include "CFInternal.h"
33 #include <CoreFoundation/CFStringEncodingExt.h>
34 #include "CFStringEncodingConverterPriv.h"
35 #include "CFStringEncodingDatabase.h"
36 #include <stdio.h>
37
38 #if DEPLOYMENT_TARGET_WINDOWS
39 #define strncasecmp_l(a, b, c, d) _strnicmp(a, b, c)
40 #define snprintf _snprintf
41 #endif
42
43 #define ISO8859CODEPAGE_BASE (28590)
44
45 static const uint16_t __CFKnownEncodingList[] = {
46 kCFStringEncodingMacRoman,
47 kCFStringEncodingMacJapanese,
48 kCFStringEncodingMacChineseTrad,
49 kCFStringEncodingMacKorean,
50 kCFStringEncodingMacArabic,
51 kCFStringEncodingMacHebrew,
52 kCFStringEncodingMacGreek,
53 kCFStringEncodingMacCyrillic,
54 kCFStringEncodingMacDevanagari,
55 kCFStringEncodingMacGurmukhi,
56 kCFStringEncodingMacGujarati,
57 kCFStringEncodingMacOriya,
58 kCFStringEncodingMacBengali,
59 kCFStringEncodingMacTamil,
60 kCFStringEncodingMacTelugu,
61 kCFStringEncodingMacKannada,
62 kCFStringEncodingMacMalayalam,
63 kCFStringEncodingMacSinhalese,
64 kCFStringEncodingMacBurmese,
65 kCFStringEncodingMacKhmer,
66 kCFStringEncodingMacThai,
67 kCFStringEncodingMacLaotian,
68 kCFStringEncodingMacGeorgian,
69 kCFStringEncodingMacArmenian,
70 kCFStringEncodingMacChineseSimp,
71 kCFStringEncodingMacTibetan,
72 kCFStringEncodingMacMongolian,
73 kCFStringEncodingMacEthiopic,
74 kCFStringEncodingMacCentralEurRoman,
75 kCFStringEncodingMacVietnamese,
76 kCFStringEncodingMacSymbol,
77 kCFStringEncodingMacDingbats,
78 kCFStringEncodingMacTurkish,
79 kCFStringEncodingMacCroatian,
80 kCFStringEncodingMacIcelandic,
81 kCFStringEncodingMacRomanian,
82 kCFStringEncodingMacCeltic,
83 kCFStringEncodingMacGaelic,
84 kCFStringEncodingMacFarsi,
85 kCFStringEncodingMacUkrainian,
86 kCFStringEncodingMacInuit,
87
88 kCFStringEncodingDOSLatinUS,
89 kCFStringEncodingDOSGreek,
90 kCFStringEncodingDOSBalticRim,
91 kCFStringEncodingDOSLatin1,
92 kCFStringEncodingDOSGreek1,
93 kCFStringEncodingDOSLatin2,
94 kCFStringEncodingDOSCyrillic,
95 kCFStringEncodingDOSTurkish,
96 kCFStringEncodingDOSPortuguese,
97 kCFStringEncodingDOSIcelandic,
98 kCFStringEncodingDOSHebrew,
99 kCFStringEncodingDOSCanadianFrench,
100 kCFStringEncodingDOSArabic,
101 kCFStringEncodingDOSNordic,
102 kCFStringEncodingDOSRussian,
103 kCFStringEncodingDOSGreek2,
104 kCFStringEncodingDOSThai,
105 kCFStringEncodingDOSJapanese,
106 kCFStringEncodingDOSChineseSimplif,
107 kCFStringEncodingDOSKorean,
108 kCFStringEncodingDOSChineseTrad,
109
110 kCFStringEncodingWindowsLatin1,
111 kCFStringEncodingWindowsLatin2,
112 kCFStringEncodingWindowsCyrillic,
113 kCFStringEncodingWindowsGreek,
114 kCFStringEncodingWindowsLatin5,
115 kCFStringEncodingWindowsHebrew,
116 kCFStringEncodingWindowsArabic,
117 kCFStringEncodingWindowsBalticRim,
118 kCFStringEncodingWindowsVietnamese,
119 kCFStringEncodingWindowsKoreanJohab,
120 kCFStringEncodingASCII,
121
122 kCFStringEncodingShiftJIS_X0213,
123 kCFStringEncodingGB_18030_2000,
124
125 kCFStringEncodingISO_2022_JP,
126 kCFStringEncodingISO_2022_JP_2,
127 kCFStringEncodingISO_2022_JP_1,
128 kCFStringEncodingISO_2022_JP_3,
129 kCFStringEncodingISO_2022_CN,
130 kCFStringEncodingISO_2022_CN_EXT,
131 kCFStringEncodingISO_2022_KR,
132 kCFStringEncodingEUC_JP,
133 kCFStringEncodingEUC_CN,
134 kCFStringEncodingEUC_TW,
135 kCFStringEncodingEUC_KR,
136
137 kCFStringEncodingShiftJIS,
138
139 kCFStringEncodingKOI8_R,
140
141 kCFStringEncodingBig5,
142
143 kCFStringEncodingMacRomanLatin1,
144 kCFStringEncodingHZ_GB_2312,
145 kCFStringEncodingBig5_HKSCS_1999,
146 kCFStringEncodingVISCII,
147 kCFStringEncodingKOI8_U,
148 kCFStringEncodingBig5_E,
149 kCFStringEncodingUTF7_IMAP,
150
151 kCFStringEncodingNextStepLatin,
152
153 kCFStringEncodingEBCDIC_CP037
154 };
155
156 // Windows codepage mapping
157 static const uint16_t __CFWindowsCPList[] = {
158 10000,
159 10001,
160 10002,
161 10003,
162 10004,
163 10005,
164 10006,
165 10007,
166 0,
167 0,
168 0,
169 0,
170 0,
171 0,
172 0,
173 0,
174 0,
175 0,
176 0,
177 0,
178 10021,
179 0,
180 0,
181 0,
182 10008,
183 0,
184 0,
185 0,
186 10029,
187 0,
188 0,
189 0,
190 10081,
191 10082,
192 10079,
193 10010,
194 0,
195 0,
196 0,
197 10017,
198 0,
199
200 437,
201 737,
202 775,
203 850,
204 851,
205 852,
206 855,
207 857,
208 860,
209 861,
210 862,
211 863,
212 864,
213 865,
214 866,
215 869,
216 874,
217 932,
218 936,
219 949,
220 950,
221
222 1252,
223 1250,
224 1251,
225 1253,
226 1254,
227 1255,
228 1256,
229 1257,
230 1258,
231 1361,
232
233 20127,
234
235 0,
236 54936,
237
238 50221, // we prefere this over 50220/50221 since that's what CF coverter generates
239 0,
240 0,
241 0,
242 50227,
243 0,
244 50225,
245
246 51932,
247 51936,
248 51950,
249 51949,
250
251 0,
252
253 20866,
254
255 0,
256
257 0,
258 52936,
259 0,
260 0,
261 21866,
262 0,
263 0,
264
265 0,
266
267 37
268 };
269
270 // Canonical name
271 static const char *__CFCanonicalNameList[] = {
272 "macintosh",
273 "japanese",
274 "trad-chinese",
275 "korean",
276 "arabic",
277 "hebrew",
278 "greek",
279 "cyrillic",
280 "devanagari",
281 "gurmukhi",
282 "gujarati",
283 "oriya",
284 "bengali",
285 "tamil",
286 "telugu",
287 "kannada",
288 "malayalam",
289 "sinhalese",
290 "burmese",
291 "khmer",
292 "thai",
293 "laotian",
294 "georgian",
295 "armenian",
296 "simp-chinese",
297 "tibetan",
298 "mongolian",
299 "ethiopic",
300 "centraleurroman",
301 "vietnamese",
302 "symbol",
303 "dingbats",
304 "turkish",
305 "croatian",
306 "icelandic",
307 "romanian",
308 "celtic",
309 "gaelic",
310 "farsi",
311 "ukrainian",
312 "inuit",
313
314 NULL,
315 NULL,
316 NULL,
317 NULL,
318 NULL,
319 NULL,
320 NULL,
321 NULL,
322 NULL,
323 NULL,
324 NULL,
325 NULL,
326 NULL,
327 NULL,
328 NULL,
329 NULL,
330 NULL,
331 NULL,
332 NULL,
333 NULL,
334 NULL,
335
336 NULL,
337 NULL,
338 NULL,
339 NULL,
340 NULL,
341 NULL,
342 NULL,
343 NULL,
344 NULL,
345 NULL,
346
347 "us-ascii",
348
349 NULL,
350 "gb18030",
351
352 "iso-2022-jp",
353 "iso-2022-jp-2",
354 "iso-2022-jp-1",
355 "iso-2022-jp-3",
356 "iso-2022-cn",
357 "iso-2022-cn-ext",
358 "iso-2022-kr",
359 "euc-jp",
360 "gb2312",
361 "euc-tw",
362 "euc-kr",
363
364 "shift_jis",
365
366 "koi8-r",
367
368 "big5",
369
370 "roman-latin1",
371 "hz-gb-2312",
372 "big5-hkscs",
373 "viscii",
374 "koi8-u",
375 NULL,
376 "utf7-imap",
377
378 "x-nextstep",
379
380 "ibm037",
381 };
382
383 static inline CFIndex __CFGetEncodingIndex(CFStringEncoding encoding) {
384 const uint16_t *head = __CFKnownEncodingList;
385 const uint16_t *tail = head + ((sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList)) - 1);
386 const uint16_t *middle;
387
388 encoding &= 0x0FFF;
389 while (head <= tail) {
390 middle = head + ((tail - head) >> 1);
391
392 if (encoding == *middle) {
393 return middle - __CFKnownEncodingList;
394 } else if (encoding < *middle) {
395 tail = middle - 1;
396 } else {
397 head = middle + 1;
398 }
399 }
400
401 return kCFNotFound;
402 }
403
404 __private_extern__ uint16_t __CFStringEncodingGetWindowsCodePage(CFStringEncoding encoding) {
405 CFStringEncoding encodingBase = encoding & 0x0F00;
406
407 if (0x0100 == encodingBase) { // UTF
408 switch (encoding) {
409 case kCFStringEncodingUTF7: return 65000;
410 case kCFStringEncodingUTF8: return 65001;
411 case kCFStringEncodingUTF16: return 1200;
412 case kCFStringEncodingUTF16BE: return 1201;
413 case kCFStringEncodingUTF32: return 65005;
414 case kCFStringEncodingUTF32BE: return 65006;
415 }
416 } else if (0x0200 == encodingBase) { // ISO 8859 range
417 return ISO8859CODEPAGE_BASE + (encoding & 0xFF);
418 } else { // others
419 CFIndex index = __CFGetEncodingIndex(encoding);
420
421 if (kCFNotFound != index) return __CFWindowsCPList[index];
422 }
423
424 return 0;
425 }
426
427 __private_extern__ CFStringEncoding __CFStringEncodingGetFromWindowsCodePage(uint16_t codepage) {
428 switch (codepage) {
429 case 65001: return kCFStringEncodingUTF8;
430 case 1200: return kCFStringEncodingUTF16;
431 case 0: return kCFStringEncodingInvalidId;
432 case 1201: return kCFStringEncodingUTF16BE;
433 case 65005: return kCFStringEncodingUTF32;
434 case 65006: return kCFStringEncodingUTF32BE;
435 case 65000: return kCFStringEncodingUTF7;
436 }
437
438 if ((codepage > ISO8859CODEPAGE_BASE) && (codepage <= (ISO8859CODEPAGE_BASE + 16))) {
439 return (codepage - ISO8859CODEPAGE_BASE) + 0x0200;
440 } else {
441 static CFMutableDictionaryRef mappingTable = NULL;
442 static CFSpinLock_t lock = CFSpinLockInit;
443 uintptr_t value;
444
445 __CFSpinLock(&lock);
446 if (NULL == mappingTable) {
447 CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
448
449 mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
450
451 for (index = 0;index < count;index++) {
452 if (0 != __CFWindowsCPList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFWindowsCPList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
453 }
454 }
455 __CFSpinUnlock(&lock);
456
457 if (CFDictionaryGetValueIfPresent(mappingTable, (const void *)(uintptr_t)codepage, (const void **)&value)) return (CFStringEncoding)value;
458 }
459
460
461 return kCFStringEncodingInvalidId;
462 }
463
464 __private_extern__ bool __CFStringEncodingGetCanonicalName(CFStringEncoding encoding, char *buffer, CFIndex bufferSize) {
465 const char *format = "%s";
466 const char *name = NULL;
467 uint32_t value = 0;
468 CFIndex index;
469
470 switch (encoding & 0x0F00) {
471 case 0x0100: // UTF range
472 switch (encoding) {
473 case kCFStringEncodingUTF7: name = "utf-7"; break;
474 case kCFStringEncodingUTF8: name = "utf-8"; break;
475 case kCFStringEncodingUTF16: name = "utf-16"; break;
476 case kCFStringEncodingUTF16BE: name = "utf-16be"; break;
477 case kCFStringEncodingUTF16LE: name = "utf-16le"; break;
478 case kCFStringEncodingUTF32: name = "utf-32"; break;
479 case kCFStringEncodingUTF32BE: name = "utf-32be"; break;
480 case kCFStringEncodingUTF32LE: name = "utf-32le"; break;
481 }
482 break;
483
484 case 0x0200: // ISO 8859 range
485 format = "iso-8859-%d";
486 value = (encoding & 0xFF);
487 break;
488
489 case 0x0400: // DOS code page range
490 case 0x0500: // Windows code page range
491 index = __CFGetEncodingIndex(encoding);
492
493 if (kCFNotFound != index) {
494 value = __CFWindowsCPList[index];
495 if (0 != value) format = ((0x0400 == (encoding & 0x0F00)) ? "cp%d" : "windows-%d");
496 }
497 break;
498
499 default: // others
500 index = __CFGetEncodingIndex(encoding);
501
502 if (kCFNotFound != index) {
503 if (((0 == (encoding & 0x0F00)) && (kCFStringEncodingMacRoman != encoding)) || (kCFStringEncodingMacRomanLatin1 == encoding)) format = "x-mac-%s";
504 name = (const char *)__CFCanonicalNameList[index];
505 }
506 break;
507 }
508
509 if ((0 == value) && (NULL == name)) {
510 return false;
511 } else if (0 != value) {
512 return ((snprintf(buffer, bufferSize, format, value) < bufferSize) ? true : false);
513 } else {
514 return ((snprintf(buffer, bufferSize, format, name) < bufferSize) ? true : false);
515 }
516 }
517
518 #define LENGTH_LIMIT (256)
519 static Boolean __CFCanonicalNameCompare(const void *value1, const void *value2) { return ((0 == strncasecmp_l((const char *)value1, (const char *)value2, LENGTH_LIMIT, NULL)) ? true : false); }
520
521 static CFHashCode __CFCanonicalNameHash(const void *value) {
522 const char *name = (const char *)value;
523 CFHashCode code = 0;
524
525 while ((0 != *name) && ((name - (const char *)value) < LENGTH_LIMIT)) {
526 char character = *(name++);
527
528 code += (character + (((character >= 'A') && (character <= 'Z')) ? 'a' - 'A' : 0));
529 }
530
531 return code * (name - (const char *)value);
532 }
533
534 __private_extern__ CFStringEncoding __CFStringEncodingGetFromCanonicalName(const char *canonicalName) {
535 CFStringEncoding encoding;
536 CFIndex prefixLength;
537 static CFMutableDictionaryRef mappingTable = NULL;
538 static CFSpinLock_t lock = CFSpinLockInit;
539
540 prefixLength = strlen("iso-8859-");
541 if (0 == strncasecmp_l(canonicalName, "iso-8859-", prefixLength, NULL)) {// do ISO
542 encoding = strtol(canonicalName + prefixLength, NULL, 10);
543
544 return (((0 == encoding) || (encoding > 16)) ? kCFStringEncodingInvalidId : encoding + 0x0200);
545 }
546
547 prefixLength = strlen("cp");
548 if (0 == strncasecmp_l(canonicalName, "cp", prefixLength, NULL)) {// do DOS
549 encoding = strtol(canonicalName + prefixLength, NULL, 10);
550
551 return __CFStringEncodingGetFromWindowsCodePage(encoding);
552 }
553
554 prefixLength = strlen("windows-");
555 if (0 == strncasecmp_l(canonicalName, "windows-", prefixLength, NULL)) {// do DOS
556 encoding = strtol(canonicalName + prefixLength, NULL, 10);
557
558 return __CFStringEncodingGetFromWindowsCodePage(encoding);
559 }
560
561 __CFSpinLock(&lock);
562 if (NULL == mappingTable) {
563 CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
564
565 CFDictionaryKeyCallBacks keys = {
566 0, NULL, NULL, NULL, &__CFCanonicalNameCompare, &__CFCanonicalNameHash
567 };
568
569 mappingTable = CFDictionaryCreateMutable(NULL, 0, &keys, NULL);
570
571 // Add UTFs
572 CFDictionarySetValue(mappingTable, "utf-7", (const void *)kCFStringEncodingUTF7);
573 CFDictionarySetValue(mappingTable, "utf-8", (const void *)kCFStringEncodingUTF8);
574 CFDictionarySetValue(mappingTable, "utf-16", (const void *)kCFStringEncodingUTF16);
575 CFDictionarySetValue(mappingTable, "utf-16be", (const void *)kCFStringEncodingUTF16BE);
576 CFDictionarySetValue(mappingTable, "utf-16le", (const void *)kCFStringEncodingUTF16LE);
577 CFDictionarySetValue(mappingTable, "utf-32", (const void *)kCFStringEncodingUTF32);
578 CFDictionarySetValue(mappingTable, "utf-32be", (const void *)kCFStringEncodingUTF32BE);
579 CFDictionarySetValue(mappingTable, "utf-32le", (const void *)kCFStringEncodingUTF32LE);
580
581 for (index = 0;index < count;index++) {
582 if (NULL != __CFCanonicalNameList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFCanonicalNameList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
583 }
584 }
585 __CFSpinUnlock(&lock);
586
587 if (0 == strncasecmp_l(canonicalName, "macintosh", sizeof("macintosh") - 1, NULL)) return kCFStringEncodingMacRoman;
588
589
590 prefixLength = strlen("x-mac-");
591 encoding = (CFStringEncoding)(CFIndex)CFDictionaryGetValue(mappingTable, canonicalName + ((0 == strncasecmp_l(canonicalName, "x-mac-", prefixLength, NULL)) ? prefixLength : 0));
592
593 return ((0 == encoding) ? kCFStringEncodingInvalidId : encoding);
594 }
595 #undef LENGTH_LIMIT
596
597 #if DEPLOYMENT_TARGET_MACOSX
598 // This list indexes from DOS range
599 static uint16_t __CFISO8859SimilarScriptList[] = {
600 kCFStringEncodingMacRoman,
601 kCFStringEncodingMacCentralEurRoman,
602 kCFStringEncodingMacRoman,
603 kCFStringEncodingMacCentralEurRoman,
604 kCFStringEncodingMacCyrillic,
605 kCFStringEncodingMacArabic,
606 kCFStringEncodingMacGreek,
607 kCFStringEncodingMacHebrew,
608 kCFStringEncodingMacTurkish,
609 kCFStringEncodingMacInuit,
610 kCFStringEncodingMacThai,
611 kCFStringEncodingMacRoman,
612 kCFStringEncodingMacCentralEurRoman,
613 kCFStringEncodingMacCeltic,
614 kCFStringEncodingMacRoman,
615 kCFStringEncodingMacRomanian};
616
617 static uint16_t __CFOtherSimilarScriptList[] = {
618 kCFStringEncodingMacRoman,
619 kCFStringEncodingMacGreek,
620 kCFStringEncodingMacCentralEurRoman,
621 kCFStringEncodingMacRoman,
622 kCFStringEncodingMacGreek,
623 kCFStringEncodingMacCentralEurRoman,
624 kCFStringEncodingMacCyrillic,
625 kCFStringEncodingMacTurkish,
626 kCFStringEncodingMacRoman,
627 kCFStringEncodingMacIcelandic,
628 kCFStringEncodingMacHebrew,
629 kCFStringEncodingMacRoman,
630 kCFStringEncodingMacArabic,
631 kCFStringEncodingMacInuit,
632 kCFStringEncodingMacCyrillic,
633 kCFStringEncodingMacGreek,
634 kCFStringEncodingMacThai,
635 kCFStringEncodingMacJapanese,
636 kCFStringEncodingMacChineseSimp,
637 kCFStringEncodingMacKorean,
638 kCFStringEncodingMacChineseTrad,
639
640 kCFStringEncodingMacRoman,
641 kCFStringEncodingMacCentralEurRoman,
642 kCFStringEncodingMacCyrillic,
643 kCFStringEncodingMacGreek,
644 kCFStringEncodingMacTurkish,
645 kCFStringEncodingMacHebrew,
646 kCFStringEncodingMacArabic,
647 kCFStringEncodingMacCentralEurRoman,
648 kCFStringEncodingMacVietnamese,
649 kCFStringEncodingMacKorean,
650
651 kCFStringEncodingMacRoman,
652
653 kCFStringEncodingMacJapanese,
654 kCFStringEncodingMacChineseSimp,
655
656 kCFStringEncodingMacJapanese,
657 kCFStringEncodingMacJapanese,
658 kCFStringEncodingMacJapanese,
659 kCFStringEncodingMacJapanese,
660 kCFStringEncodingMacChineseSimp,
661 kCFStringEncodingMacChineseSimp,
662 kCFStringEncodingMacKorean,
663 kCFStringEncodingMacJapanese,
664 kCFStringEncodingMacChineseSimp,
665 kCFStringEncodingMacChineseTrad,
666 kCFStringEncodingMacKorean,
667
668 kCFStringEncodingMacJapanese,
669
670 kCFStringEncodingMacCyrillic,
671
672 kCFStringEncodingMacChineseTrad,
673
674 kCFStringEncodingMacRoman,
675 kCFStringEncodingMacChineseSimp,
676 kCFStringEncodingMacChineseTrad,
677 kCFStringEncodingMacVietnamese,
678 kCFStringEncodingMacUkrainian,
679 kCFStringEncodingMacChineseTrad,
680 kCFStringEncodingMacRoman,
681
682 kCFStringEncodingMacRoman,
683
684 kCFStringEncodingMacRoman
685 };
686
687 static const char *__CFISONameList[] = {
688 "Western (ISO Latin 1)",
689 "Central European (ISO Latin 2)",
690 "Western (ISO Latin 3)",
691 "Central European (ISO Latin 4)",
692 "Cyrillic (ISO 8859-5)",
693 "Arabic (ISO 8859-6)",
694 "Greek (ISO 8859-7)",
695 "Hebrew (ISO 8859-8)",
696 "Turkish (ISO Latin 5)",
697 "Nordic (ISO Latin 6)",
698 "Thai (ISO 8859-11)",
699 NULL,
700 "Baltic (ISO Latin 7)",
701 "Celtic (ISO Latin 8)",
702 "Western (ISO Latin 9)",
703 "Romanian (ISO Latin 10)",
704 };
705
706 static const char *__CFOtherNameList[] = {
707 "Western (Mac OS Roman)",
708 "Japanese (Mac OS)",
709 "Traditional Chinese (Mac OS)",
710 "Korean (Mac OS)",
711 "Arabic (Mac OS)",
712 "Hebrew (Mac OS)",
713 "Greek (Mac OS)",
714 "Cyrillic (Mac OS)",
715 "Devanagari (Mac OS)",
716 "Gurmukhi (Mac OS)",
717 "Gujarati (Mac OS)",
718 "Oriya (Mac OS)",
719 "Bengali (Mac OS)",
720 "Tamil (Mac OS)",
721 "Telugu (Mac OS)",
722 "Kannada (Mac OS)",
723 "Malayalam (Mac OS)",
724 "Sinhalese (Mac OS)",
725 "Burmese (Mac OS)",
726 "Khmer (Mac OS)",
727 "Thai (Mac OS)",
728 "Laotian (Mac OS)",
729 "Georgian (Mac OS)",
730 "Armenian (Mac OS)",
731 "Simplified Chinese (Mac OS)",
732 "Tibetan (Mac OS)",
733 "Mongolian (Mac OS)",
734 "Ethiopic (Mac OS)",
735 "Central European (Mac OS)",
736 "Vietnamese (Mac OS)",
737 "Symbol (Mac OS)",
738 "Dingbats (Mac OS)",
739 "Turkish (Mac OS)",
740 "Croatian (Mac OS)",
741 "Icelandic (Mac OS)",
742 "Romanian (Mac OS)",
743 "Celtic (Mac OS)",
744 "Gaelic (Mac OS)",
745 "Farsi (Mac OS)",
746 "Cyrillic (Mac OS Ukrainian)",
747 "Inuit (Mac OS)",
748 "Latin-US (DOS)",
749 "Greek (DOS)",
750 "Baltic (DOS)",
751 "Western (DOS Latin 1)",
752 "Greek (DOS Greek 1)",
753 "Central European (DOS Latin 2)",
754 "Cyrillic (DOS)",
755 "Turkish (DOS)",
756 "Portuguese (DOS)",
757 "Icelandic (DOS)",
758 "Hebrew (DOS)",
759 "Canadian French (DOS)",
760 "Arabic (DOS)",
761 "Nordic (DOS)",
762 "Russian (DOS)",
763 "Greek (DOS Greek 2)",
764 "Thai (Windows, DOS)",
765 "Japanese (Windows, DOS)",
766 "Simplified Chinese (Windows, DOS)",
767 "Korean (Windows, DOS)",
768 "Traditional Chinese (Windows, DOS)",
769 "Western (Windows Latin 1)",
770 "Central European (Windows Latin 2)",
771 "Cyrillic (Windows)",
772 "Greek (Windows)",
773 "Turkish (Windows Latin 5)",
774 "Hebrew (Windows)",
775 "Arabic (Windows)",
776 "Baltic (Windows)",
777 "Vietnamese (Windows)",
778 "Korean (Windows Johab)",
779 "Western (ASCII)",
780 "Japanese (Shift JIS X0213)",
781 "Chinese (GB 18030)",
782 "Japanese (ISO 2022-JP)",
783 "Japanese (ISO 2022-JP-2)",
784 "Japanese (ISO 2022-JP-1)",
785 "Japanese (ISO 2022-JP-3)",
786 "Chinese (ISO 2022-CN)",
787 "Chinese (ISO 2022-CN-EXT)",
788 "Korean (ISO 2022-KR)",
789 "Japanese (EUC)",
790 "Simplified Chinese (GB 2312)",
791 "Traditional Chinese (EUC)",
792 "Korean (EUC)",
793 "Japanese (Shift JIS)",
794 "Cyrillic (KOI8-R)",
795 "Traditional Chinese (Big 5)",
796 "Western (Mac Mail)",
797 "Simplified Chinese (HZ GB 2312)",
798 "Traditional Chinese (Big 5 HKSCS)",
799 NULL,
800 "Ukrainian (KOI8-U)",
801 "Traditional Chinese (Big 5-E)",
802 NULL,
803 "Western (NextStep)",
804 "Western (EBCDIC Latin 1)",
805 };
806 #endif /* DEPLOYMENT_TARGET_MACOSX */
807
808 __private_extern__ CFStringEncoding __CFStringEncodingGetMostCompatibleMacScript(CFStringEncoding encoding) {
809 #if DEPLOYMENT_TARGET_MACOSX
810 switch (encoding & 0x0F00) {
811 case 0: return encoding & 0xFF; break; // Mac scripts
812
813 case 0x0100: return kCFStringEncodingUnicode; break; // Unicode
814
815 case 0x200: // ISO 8859
816 return (((encoding & 0xFF) <= (sizeof(__CFISO8859SimilarScriptList) / sizeof(*__CFISO8859SimilarScriptList))) ? __CFISO8859SimilarScriptList[(encoding & 0xFF) - 1] : kCFStringEncodingInvalidId);
817 break;
818
819 default: {
820 CFIndex index = __CFGetEncodingIndex(encoding);
821
822 if (kCFNotFound != index) {
823 index -= __CFGetEncodingIndex(kCFStringEncodingDOSLatinUS);
824 return __CFOtherSimilarScriptList[index];
825 }
826 }
827 }
828 #endif /* DEPLOYMENT_TARGET_MACOSX */
829
830 return kCFStringEncodingInvalidId;
831 }
832
833 __private_extern__ const char *__CFStringEncodingGetName(CFStringEncoding encoding) {
834 switch (encoding) {
835 case kCFStringEncodingUTF8: return "Unicode (UTF-8)"; break;
836 case kCFStringEncodingUTF16: return "Unicode (UTF-16)"; break;
837 case kCFStringEncodingUTF16BE: return "Unicode (UTF-16BE)"; break;
838 case kCFStringEncodingUTF16LE: return "Unicode (UTF-16LE)"; break;
839 case kCFStringEncodingUTF32: return "Unicode (UTF-32)"; break;
840 case kCFStringEncodingUTF32BE: return "Unicode (UTF-32BE)"; break;
841 case kCFStringEncodingUTF32LE: return "Unicode (UTF-32LE)"; break;
842 case kCFStringEncodingNonLossyASCII: return "Non-lossy ASCII"; break;
843 case kCFStringEncodingUTF7: return "Unicode (UTF-7)"; break;
844 }
845
846 #if DEPLOYMENT_TARGET_MACOSX
847 if (0x0200 == (encoding & 0x0F00)) {
848 encoding &= 0x00FF;
849
850 if (encoding <= (sizeof(__CFISONameList) / sizeof(*__CFISONameList))) return __CFISONameList[encoding - 1];
851 } else {
852 CFIndex index = __CFGetEncodingIndex(encoding);
853
854 if (kCFNotFound != index) return __CFOtherNameList[index];
855 }
856 #endif /* DEPLOYMENT_TARGET_MACOSX */
857
858 return NULL;
859 }