]> git.saurik.com Git - apple/cf.git/blob - CFStringEncodingDatabase.c
CF-550.13.tar.gz
[apple/cf.git] / CFStringEncodingDatabase.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * CFStringEncodingDatabase.c
26 * CoreFoundation
27 *
28 * Created by Aki Inoue on 07/12/05.
29 * Copyright 2007-2009, Apple Inc. All rights reserved.
30 *
31 */
32
33 #include "CFInternal.h"
34 #include <CoreFoundation/CFStringEncodingExt.h>
35 #include "CFStringEncodingConverterPriv.h"
36 #include "CFStringEncodingDatabase.h"
37 #include <stdio.h>
38
39 #if DEPLOYMENT_TARGET_WINDOWS
40 #define strncasecmp_l(a, b, c, d) _strnicmp(a, b, c)
41 #define snprintf _snprintf
42 #endif
43
44 #define ISO8859CODEPAGE_BASE (28590)
45
46 static const uint16_t __CFKnownEncodingList[] = {
47 kCFStringEncodingMacRoman,
48 kCFStringEncodingMacJapanese,
49 kCFStringEncodingMacChineseTrad,
50 kCFStringEncodingMacKorean,
51 kCFStringEncodingMacArabic,
52 kCFStringEncodingMacHebrew,
53 kCFStringEncodingMacGreek,
54 kCFStringEncodingMacCyrillic,
55 kCFStringEncodingMacDevanagari,
56 kCFStringEncodingMacGurmukhi,
57 kCFStringEncodingMacGujarati,
58 kCFStringEncodingMacOriya,
59 kCFStringEncodingMacBengali,
60 kCFStringEncodingMacTamil,
61 kCFStringEncodingMacTelugu,
62 kCFStringEncodingMacKannada,
63 kCFStringEncodingMacMalayalam,
64 kCFStringEncodingMacSinhalese,
65 kCFStringEncodingMacBurmese,
66 kCFStringEncodingMacKhmer,
67 kCFStringEncodingMacThai,
68 kCFStringEncodingMacLaotian,
69 kCFStringEncodingMacGeorgian,
70 kCFStringEncodingMacArmenian,
71 kCFStringEncodingMacChineseSimp,
72 kCFStringEncodingMacTibetan,
73 kCFStringEncodingMacMongolian,
74 kCFStringEncodingMacEthiopic,
75 kCFStringEncodingMacCentralEurRoman,
76 kCFStringEncodingMacVietnamese,
77 kCFStringEncodingMacSymbol,
78 kCFStringEncodingMacDingbats,
79 kCFStringEncodingMacTurkish,
80 kCFStringEncodingMacCroatian,
81 kCFStringEncodingMacIcelandic,
82 kCFStringEncodingMacRomanian,
83 kCFStringEncodingMacCeltic,
84 kCFStringEncodingMacGaelic,
85 kCFStringEncodingMacFarsi,
86 kCFStringEncodingMacUkrainian,
87 kCFStringEncodingMacInuit,
88
89 kCFStringEncodingDOSLatinUS,
90 kCFStringEncodingDOSGreek,
91 kCFStringEncodingDOSBalticRim,
92 kCFStringEncodingDOSLatin1,
93 kCFStringEncodingDOSGreek1,
94 kCFStringEncodingDOSLatin2,
95 kCFStringEncodingDOSCyrillic,
96 kCFStringEncodingDOSTurkish,
97 kCFStringEncodingDOSPortuguese,
98 kCFStringEncodingDOSIcelandic,
99 kCFStringEncodingDOSHebrew,
100 kCFStringEncodingDOSCanadianFrench,
101 kCFStringEncodingDOSArabic,
102 kCFStringEncodingDOSNordic,
103 kCFStringEncodingDOSRussian,
104 kCFStringEncodingDOSGreek2,
105 kCFStringEncodingDOSThai,
106 kCFStringEncodingDOSJapanese,
107 kCFStringEncodingDOSChineseSimplif,
108 kCFStringEncodingDOSKorean,
109 kCFStringEncodingDOSChineseTrad,
110
111 kCFStringEncodingWindowsLatin1,
112 kCFStringEncodingWindowsLatin2,
113 kCFStringEncodingWindowsCyrillic,
114 kCFStringEncodingWindowsGreek,
115 kCFStringEncodingWindowsLatin5,
116 kCFStringEncodingWindowsHebrew,
117 kCFStringEncodingWindowsArabic,
118 kCFStringEncodingWindowsBalticRim,
119 kCFStringEncodingWindowsVietnamese,
120 kCFStringEncodingWindowsKoreanJohab,
121 kCFStringEncodingASCII,
122
123 kCFStringEncodingShiftJIS_X0213,
124 kCFStringEncodingGB_18030_2000,
125
126 kCFStringEncodingISO_2022_JP,
127 kCFStringEncodingISO_2022_JP_2,
128 kCFStringEncodingISO_2022_JP_1,
129 kCFStringEncodingISO_2022_JP_3,
130 kCFStringEncodingISO_2022_CN,
131 kCFStringEncodingISO_2022_CN_EXT,
132 kCFStringEncodingISO_2022_KR,
133 kCFStringEncodingEUC_JP,
134 kCFStringEncodingEUC_CN,
135 kCFStringEncodingEUC_TW,
136 kCFStringEncodingEUC_KR,
137
138 kCFStringEncodingShiftJIS,
139
140 kCFStringEncodingKOI8_R,
141
142 kCFStringEncodingBig5,
143
144 kCFStringEncodingMacRomanLatin1,
145 kCFStringEncodingHZ_GB_2312,
146 kCFStringEncodingBig5_HKSCS_1999,
147 kCFStringEncodingVISCII,
148 kCFStringEncodingKOI8_U,
149 kCFStringEncodingBig5_E,
150 kCFStringEncodingUTF7_IMAP,
151
152 kCFStringEncodingNextStepLatin,
153
154 kCFStringEncodingEBCDIC_CP037
155 };
156
157 // Windows codepage mapping
158 static const uint16_t __CFWindowsCPList[] = {
159 10000,
160 10001,
161 10002,
162 10003,
163 10004,
164 10005,
165 10006,
166 10007,
167 0,
168 0,
169 0,
170 0,
171 0,
172 0,
173 0,
174 0,
175 0,
176 0,
177 0,
178 0,
179 10021,
180 0,
181 0,
182 0,
183 10008,
184 0,
185 0,
186 0,
187 10029,
188 0,
189 0,
190 0,
191 10081,
192 10082,
193 10079,
194 10010,
195 0,
196 0,
197 0,
198 10017,
199 0,
200
201 437,
202 737,
203 775,
204 850,
205 851,
206 852,
207 855,
208 857,
209 860,
210 861,
211 862,
212 863,
213 864,
214 865,
215 866,
216 869,
217 874,
218 932,
219 936,
220 949,
221 950,
222
223 1252,
224 1250,
225 1251,
226 1253,
227 1254,
228 1255,
229 1256,
230 1257,
231 1258,
232 1361,
233
234 20127,
235
236 0,
237 54936,
238
239 50221, // we prefere this over 50220/50221 since that's what CF coverter generates
240 0,
241 0,
242 0,
243 50227,
244 0,
245 50225,
246
247 51932,
248 51936,
249 51950,
250 51949,
251
252 0,
253
254 20866,
255
256 0,
257
258 0,
259 52936,
260 0,
261 0,
262 21866,
263 0,
264 0,
265
266 0,
267
268 37
269 };
270
271 // Canonical name
272 static const char *__CFCanonicalNameList[] = {
273 "macintosh",
274 "japanese",
275 "trad-chinese",
276 "korean",
277 "arabic",
278 "hebrew",
279 "greek",
280 "cyrillic",
281 "devanagari",
282 "gurmukhi",
283 "gujarati",
284 "oriya",
285 "bengali",
286 "tamil",
287 "telugu",
288 "kannada",
289 "malayalam",
290 "sinhalese",
291 "burmese",
292 "khmer",
293 "thai",
294 "laotian",
295 "georgian",
296 "armenian",
297 "simp-chinese",
298 "tibetan",
299 "mongolian",
300 "ethiopic",
301 "centraleurroman",
302 "vietnamese",
303 "symbol",
304 "dingbats",
305 "turkish",
306 "croatian",
307 "icelandic",
308 "romanian",
309 "celtic",
310 "gaelic",
311 "farsi",
312 "ukrainian",
313 "inuit",
314
315 NULL,
316 NULL,
317 NULL,
318 NULL,
319 NULL,
320 NULL,
321 NULL,
322 NULL,
323 NULL,
324 NULL,
325 NULL,
326 NULL,
327 NULL,
328 NULL,
329 NULL,
330 NULL,
331 NULL,
332 NULL,
333 NULL,
334 NULL,
335 NULL,
336
337 NULL,
338 NULL,
339 NULL,
340 NULL,
341 NULL,
342 NULL,
343 NULL,
344 NULL,
345 NULL,
346 NULL,
347
348 "us-ascii",
349
350 NULL,
351 "gb18030",
352
353 "iso-2022-jp",
354 "iso-2022-jp-2",
355 "iso-2022-jp-1",
356 "iso-2022-jp-3",
357 "iso-2022-cn",
358 "iso-2022-cn-ext",
359 "iso-2022-kr",
360 "euc-jp",
361 "gb2312",
362 "euc-tw",
363 "euc-kr",
364
365 "shift_jis",
366
367 "koi8-r",
368
369 "big5",
370
371 "roman-latin1",
372 "hz-gb-2312",
373 "big5-hkscs",
374 "viscii",
375 "koi8-u",
376 NULL,
377 "utf7-imap",
378
379 "x-nextstep",
380
381 "ibm037",
382 };
383
384 static inline CFIndex __CFGetEncodingIndex(CFStringEncoding encoding) {
385 const uint16_t *head = __CFKnownEncodingList;
386 const uint16_t *tail = head + ((sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList)) - 1);
387 const uint16_t *middle;
388
389 encoding &= 0x0FFF;
390 while (head <= tail) {
391 middle = head + ((tail - head) >> 1);
392
393 if (encoding == *middle) {
394 return middle - __CFKnownEncodingList;
395 } else if (encoding < *middle) {
396 tail = middle - 1;
397 } else {
398 head = middle + 1;
399 }
400 }
401
402 return kCFNotFound;
403 }
404
405 __private_extern__ uint16_t __CFStringEncodingGetWindowsCodePage(CFStringEncoding encoding) {
406 CFStringEncoding encodingBase = encoding & 0x0F00;
407
408 if (0x0100 == encodingBase) { // UTF
409 switch (encoding) {
410 case kCFStringEncodingUTF7: return 65000;
411 case kCFStringEncodingUTF8: return 65001;
412 case kCFStringEncodingUTF16: return 1200;
413 case kCFStringEncodingUTF16BE: return 1201;
414 case kCFStringEncodingUTF32: return 65005;
415 case kCFStringEncodingUTF32BE: return 65006;
416 }
417 } else if (0x0200 == encodingBase) { // ISO 8859 range
418 return ISO8859CODEPAGE_BASE + (encoding & 0xFF);
419 } else { // others
420 CFIndex index = __CFGetEncodingIndex(encoding);
421
422 if (kCFNotFound != index) return __CFWindowsCPList[index];
423 }
424
425 return 0;
426 }
427
428 __private_extern__ CFStringEncoding __CFStringEncodingGetFromWindowsCodePage(uint16_t codepage) {
429 switch (codepage) {
430 case 65001: return kCFStringEncodingUTF8;
431 case 1200: return kCFStringEncodingUTF16;
432 case 0: return kCFStringEncodingInvalidId;
433 case 1201: return kCFStringEncodingUTF16BE;
434 case 65005: return kCFStringEncodingUTF32;
435 case 65006: return kCFStringEncodingUTF32BE;
436 case 65000: return kCFStringEncodingUTF7;
437 }
438
439 if ((codepage > ISO8859CODEPAGE_BASE) && (codepage <= (ISO8859CODEPAGE_BASE + 16))) {
440 return (codepage - ISO8859CODEPAGE_BASE) + 0x0200;
441 } else {
442 static CFMutableDictionaryRef mappingTable = NULL;
443 static CFSpinLock_t lock = CFSpinLockInit;
444 uintptr_t value;
445
446 __CFSpinLock(&lock);
447 if (NULL == mappingTable) {
448 CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
449
450 mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
451
452 for (index = 0;index < count;index++) {
453 if (0 != __CFWindowsCPList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFWindowsCPList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
454 }
455 }
456 __CFSpinUnlock(&lock);
457
458 if (CFDictionaryGetValueIfPresent(mappingTable, (const void *)(uintptr_t)codepage, (const void **)&value)) return (CFStringEncoding)value;
459 }
460
461
462 return kCFStringEncodingInvalidId;
463 }
464
465 __private_extern__ bool __CFStringEncodingGetCanonicalName(CFStringEncoding encoding, char *buffer, CFIndex bufferSize) {
466 const char *format = "%s";
467 const char *name = NULL;
468 uint32_t value = 0;
469 CFIndex index;
470
471 switch (encoding & 0x0F00) {
472 case 0x0100: // UTF range
473 switch (encoding) {
474 case kCFStringEncodingUTF7: name = "utf-7"; break;
475 case kCFStringEncodingUTF8: name = "utf-8"; break;
476 case kCFStringEncodingUTF16: name = "utf-16"; break;
477 case kCFStringEncodingUTF16BE: name = "utf-16be"; break;
478 case kCFStringEncodingUTF16LE: name = "utf-16le"; break;
479 case kCFStringEncodingUTF32: name = "utf-32"; break;
480 case kCFStringEncodingUTF32BE: name = "utf-32be"; break;
481 case kCFStringEncodingUTF32LE: name = "utf-32le"; break;
482 }
483 break;
484
485 case 0x0200: // ISO 8859 range
486 format = "iso-8859-%d";
487 value = (encoding & 0xFF);
488 break;
489
490 case 0x0400: // DOS code page range
491 case 0x0500: // Windows code page range
492 index = __CFGetEncodingIndex(encoding);
493
494 if (kCFNotFound != index) {
495 value = __CFWindowsCPList[index];
496 if (0 != value) format = ((0x0400 == (encoding & 0x0F00)) ? "cp%d" : "windows-%d");
497 }
498 break;
499
500 default: // others
501 index = __CFGetEncodingIndex(encoding);
502
503 if (kCFNotFound != index) {
504 if (((0 == (encoding & 0x0F00)) && (kCFStringEncodingMacRoman != encoding)) || (kCFStringEncodingMacRomanLatin1 == encoding)) format = "x-mac-%s";
505 name = (const char *)__CFCanonicalNameList[index];
506 }
507 break;
508 }
509
510 if ((0 == value) && (NULL == name)) {
511 return false;
512 } else if (0 != value) {
513 return ((snprintf(buffer, bufferSize, format, value) < bufferSize) ? true : false);
514 } else {
515 return ((snprintf(buffer, bufferSize, format, name) < bufferSize) ? true : false);
516 }
517 }
518
519 #define LENGTH_LIMIT (256)
520 static Boolean __CFCanonicalNameCompare(const void *value1, const void *value2) { return ((0 == strncasecmp_l((const char *)value1, (const char *)value2, LENGTH_LIMIT, NULL)) ? true : false); }
521
522 static CFHashCode __CFCanonicalNameHash(const void *value) {
523 const char *name = (const char *)value;
524 CFHashCode code = 0;
525
526 while ((0 != *name) && ((name - (const char *)value) < LENGTH_LIMIT)) {
527 char character = *(name++);
528
529 code += (character + (((character >= 'A') && (character <= 'Z')) ? 'a' - 'A' : 0));
530 }
531
532 return code * (name - (const char *)value);
533 }
534
535 __private_extern__ CFStringEncoding __CFStringEncodingGetFromCanonicalName(const char *canonicalName) {
536 CFStringEncoding encoding;
537 CFIndex prefixLength;
538 static CFMutableDictionaryRef mappingTable = NULL;
539 static CFSpinLock_t lock = CFSpinLockInit;
540
541 prefixLength = strlen("iso-8859-");
542 if (0 == strncasecmp_l(canonicalName, "iso-8859-", prefixLength, NULL)) {// do ISO
543 encoding = strtol(canonicalName + prefixLength, NULL, 10);
544
545 return (((0 == encoding) || (encoding > 16)) ? kCFStringEncodingInvalidId : encoding + 0x0200);
546 }
547
548 prefixLength = strlen("cp");
549 if (0 == strncasecmp_l(canonicalName, "cp", prefixLength, NULL)) {// do DOS
550 encoding = strtol(canonicalName + prefixLength, NULL, 10);
551
552 return __CFStringEncodingGetFromWindowsCodePage(encoding);
553 }
554
555 prefixLength = strlen("windows-");
556 if (0 == strncasecmp_l(canonicalName, "windows-", prefixLength, NULL)) {// do DOS
557 encoding = strtol(canonicalName + prefixLength, NULL, 10);
558
559 return __CFStringEncodingGetFromWindowsCodePage(encoding);
560 }
561
562 __CFSpinLock(&lock);
563 if (NULL == mappingTable) {
564 CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
565
566 CFDictionaryKeyCallBacks keys = {
567 0, NULL, NULL, NULL, &__CFCanonicalNameCompare, &__CFCanonicalNameHash
568 };
569
570 mappingTable = CFDictionaryCreateMutable(NULL, 0, &keys, NULL);
571
572 // Add UTFs
573 CFDictionarySetValue(mappingTable, "utf-7", (const void *)kCFStringEncodingUTF7);
574 CFDictionarySetValue(mappingTable, "utf-8", (const void *)kCFStringEncodingUTF8);
575 CFDictionarySetValue(mappingTable, "utf-16", (const void *)kCFStringEncodingUTF16);
576 CFDictionarySetValue(mappingTable, "utf-16be", (const void *)kCFStringEncodingUTF16BE);
577 CFDictionarySetValue(mappingTable, "utf-16le", (const void *)kCFStringEncodingUTF16LE);
578 CFDictionarySetValue(mappingTable, "utf-32", (const void *)kCFStringEncodingUTF32);
579 CFDictionarySetValue(mappingTable, "utf-32be", (const void *)kCFStringEncodingUTF32BE);
580 CFDictionarySetValue(mappingTable, "utf-32le", (const void *)kCFStringEncodingUTF32LE);
581
582 for (index = 0;index < count;index++) {
583 if (NULL != __CFCanonicalNameList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFCanonicalNameList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
584 }
585 }
586 __CFSpinUnlock(&lock);
587
588 if (0 == strncasecmp_l(canonicalName, "macintosh", sizeof("macintosh") - 1, NULL)) return kCFStringEncodingMacRoman;
589
590
591 prefixLength = strlen("x-mac-");
592 encoding = (CFStringEncoding)(CFIndex)CFDictionaryGetValue(mappingTable, canonicalName + ((0 == strncasecmp_l(canonicalName, "x-mac-", prefixLength, NULL)) ? prefixLength : 0));
593
594 return ((0 == encoding) ? kCFStringEncodingInvalidId : encoding);
595 }
596 #undef LENGTH_LIMIT
597
598 #if DEPLOYMENT_TARGET_MACOSX
599 // This list indexes from DOS range
600 static uint16_t __CFISO8859SimilarScriptList[] = {
601 kCFStringEncodingMacRoman,
602 kCFStringEncodingMacCentralEurRoman,
603 kCFStringEncodingMacRoman,
604 kCFStringEncodingMacCentralEurRoman,
605 kCFStringEncodingMacCyrillic,
606 kCFStringEncodingMacArabic,
607 kCFStringEncodingMacGreek,
608 kCFStringEncodingMacHebrew,
609 kCFStringEncodingMacTurkish,
610 kCFStringEncodingMacInuit,
611 kCFStringEncodingMacThai,
612 kCFStringEncodingMacRoman,
613 kCFStringEncodingMacCentralEurRoman,
614 kCFStringEncodingMacCeltic,
615 kCFStringEncodingMacRoman,
616 kCFStringEncodingMacRomanian};
617
618 static uint16_t __CFOtherSimilarScriptList[] = {
619 kCFStringEncodingMacRoman,
620 kCFStringEncodingMacGreek,
621 kCFStringEncodingMacCentralEurRoman,
622 kCFStringEncodingMacRoman,
623 kCFStringEncodingMacGreek,
624 kCFStringEncodingMacCentralEurRoman,
625 kCFStringEncodingMacCyrillic,
626 kCFStringEncodingMacTurkish,
627 kCFStringEncodingMacRoman,
628 kCFStringEncodingMacIcelandic,
629 kCFStringEncodingMacHebrew,
630 kCFStringEncodingMacRoman,
631 kCFStringEncodingMacArabic,
632 kCFStringEncodingMacInuit,
633 kCFStringEncodingMacCyrillic,
634 kCFStringEncodingMacGreek,
635 kCFStringEncodingMacThai,
636 kCFStringEncodingMacJapanese,
637 kCFStringEncodingMacChineseSimp,
638 kCFStringEncodingMacKorean,
639 kCFStringEncodingMacChineseTrad,
640
641 kCFStringEncodingMacRoman,
642 kCFStringEncodingMacCentralEurRoman,
643 kCFStringEncodingMacCyrillic,
644 kCFStringEncodingMacGreek,
645 kCFStringEncodingMacTurkish,
646 kCFStringEncodingMacHebrew,
647 kCFStringEncodingMacArabic,
648 kCFStringEncodingMacCentralEurRoman,
649 kCFStringEncodingMacVietnamese,
650 kCFStringEncodingMacKorean,
651
652 kCFStringEncodingMacRoman,
653
654 kCFStringEncodingMacJapanese,
655 kCFStringEncodingMacChineseSimp,
656
657 kCFStringEncodingMacJapanese,
658 kCFStringEncodingMacJapanese,
659 kCFStringEncodingMacJapanese,
660 kCFStringEncodingMacJapanese,
661 kCFStringEncodingMacChineseSimp,
662 kCFStringEncodingMacChineseSimp,
663 kCFStringEncodingMacKorean,
664 kCFStringEncodingMacJapanese,
665 kCFStringEncodingMacChineseSimp,
666 kCFStringEncodingMacChineseTrad,
667 kCFStringEncodingMacKorean,
668
669 kCFStringEncodingMacJapanese,
670
671 kCFStringEncodingMacCyrillic,
672
673 kCFStringEncodingMacChineseTrad,
674
675 kCFStringEncodingMacRoman,
676 kCFStringEncodingMacChineseSimp,
677 kCFStringEncodingMacChineseTrad,
678 kCFStringEncodingMacVietnamese,
679 kCFStringEncodingMacUkrainian,
680 kCFStringEncodingMacChineseTrad,
681 kCFStringEncodingMacRoman,
682
683 kCFStringEncodingMacRoman,
684
685 kCFStringEncodingMacRoman
686 };
687
688 static const char *__CFISONameList[] = {
689 "Western (ISO Latin 1)",
690 "Central European (ISO Latin 2)",
691 "Western (ISO Latin 3)",
692 "Central European (ISO Latin 4)",
693 "Cyrillic (ISO 8859-5)",
694 "Arabic (ISO 8859-6)",
695 "Greek (ISO 8859-7)",
696 "Hebrew (ISO 8859-8)",
697 "Turkish (ISO Latin 5)",
698 "Nordic (ISO Latin 6)",
699 "Thai (ISO 8859-11)",
700 NULL,
701 "Baltic (ISO Latin 7)",
702 "Celtic (ISO Latin 8)",
703 "Western (ISO Latin 9)",
704 "Romanian (ISO Latin 10)",
705 };
706
707 static const char *__CFOtherNameList[] = {
708 "Western (Mac OS Roman)",
709 "Japanese (Mac OS)",
710 "Traditional Chinese (Mac OS)",
711 "Korean (Mac OS)",
712 "Arabic (Mac OS)",
713 "Hebrew (Mac OS)",
714 "Greek (Mac OS)",
715 "Cyrillic (Mac OS)",
716 "Devanagari (Mac OS)",
717 "Gurmukhi (Mac OS)",
718 "Gujarati (Mac OS)",
719 "Oriya (Mac OS)",
720 "Bengali (Mac OS)",
721 "Tamil (Mac OS)",
722 "Telugu (Mac OS)",
723 "Kannada (Mac OS)",
724 "Malayalam (Mac OS)",
725 "Sinhalese (Mac OS)",
726 "Burmese (Mac OS)",
727 "Khmer (Mac OS)",
728 "Thai (Mac OS)",
729 "Laotian (Mac OS)",
730 "Georgian (Mac OS)",
731 "Armenian (Mac OS)",
732 "Simplified Chinese (Mac OS)",
733 "Tibetan (Mac OS)",
734 "Mongolian (Mac OS)",
735 "Ethiopic (Mac OS)",
736 "Central European (Mac OS)",
737 "Vietnamese (Mac OS)",
738 "Symbol (Mac OS)",
739 "Dingbats (Mac OS)",
740 "Turkish (Mac OS)",
741 "Croatian (Mac OS)",
742 "Icelandic (Mac OS)",
743 "Romanian (Mac OS)",
744 "Celtic (Mac OS)",
745 "Gaelic (Mac OS)",
746 "Farsi (Mac OS)",
747 "Cyrillic (Mac OS Ukrainian)",
748 "Inuit (Mac OS)",
749 "Latin-US (DOS)",
750 "Greek (DOS)",
751 "Baltic (DOS)",
752 "Western (DOS Latin 1)",
753 "Greek (DOS Greek 1)",
754 "Central European (DOS Latin 2)",
755 "Cyrillic (DOS)",
756 "Turkish (DOS)",
757 "Portuguese (DOS)",
758 "Icelandic (DOS)",
759 "Hebrew (DOS)",
760 "Canadian French (DOS)",
761 "Arabic (DOS)",
762 "Nordic (DOS)",
763 "Russian (DOS)",
764 "Greek (DOS Greek 2)",
765 "Thai (Windows, DOS)",
766 "Japanese (Windows, DOS)",
767 "Simplified Chinese (Windows, DOS)",
768 "Korean (Windows, DOS)",
769 "Traditional Chinese (Windows, DOS)",
770 "Western (Windows Latin 1)",
771 "Central European (Windows Latin 2)",
772 "Cyrillic (Windows)",
773 "Greek (Windows)",
774 "Turkish (Windows Latin 5)",
775 "Hebrew (Windows)",
776 "Arabic (Windows)",
777 "Baltic (Windows)",
778 "Vietnamese (Windows)",
779 "Korean (Windows Johab)",
780 "Western (ASCII)",
781 "Japanese (Shift JIS X0213)",
782 "Chinese (GB 18030)",
783 "Japanese (ISO 2022-JP)",
784 "Japanese (ISO 2022-JP-2)",
785 "Japanese (ISO 2022-JP-1)",
786 "Japanese (ISO 2022-JP-3)",
787 "Chinese (ISO 2022-CN)",
788 "Chinese (ISO 2022-CN-EXT)",
789 "Korean (ISO 2022-KR)",
790 "Japanese (EUC)",
791 "Simplified Chinese (GB 2312)",
792 "Traditional Chinese (EUC)",
793 "Korean (EUC)",
794 "Japanese (Shift JIS)",
795 "Cyrillic (KOI8-R)",
796 "Traditional Chinese (Big 5)",
797 "Western (Mac Mail)",
798 "Simplified Chinese (HZ GB 2312)",
799 "Traditional Chinese (Big 5 HKSCS)",
800 NULL,
801 "Ukrainian (KOI8-U)",
802 "Traditional Chinese (Big 5-E)",
803 NULL,
804 "Western (NextStep)",
805 "Western (EBCDIC Latin 1)",
806 };
807 #endif /* DEPLOYMENT_TARGET_MACOSX */
808
809 __private_extern__ CFStringEncoding __CFStringEncodingGetMostCompatibleMacScript(CFStringEncoding encoding) {
810 #if DEPLOYMENT_TARGET_MACOSX
811 switch (encoding & 0x0F00) {
812 case 0: return encoding & 0xFF; break; // Mac scripts
813
814 case 0x0100: return kCFStringEncodingUnicode; break; // Unicode
815
816 case 0x200: // ISO 8859
817 return (((encoding & 0xFF) <= (sizeof(__CFISO8859SimilarScriptList) / sizeof(*__CFISO8859SimilarScriptList))) ? __CFISO8859SimilarScriptList[(encoding & 0xFF) - 1] : kCFStringEncodingInvalidId);
818 break;
819
820 default: {
821 CFIndex index = __CFGetEncodingIndex(encoding);
822
823 if (kCFNotFound != index) {
824 index -= __CFGetEncodingIndex(kCFStringEncodingDOSLatinUS);
825 return __CFOtherSimilarScriptList[index];
826 }
827 }
828 }
829 #endif /* DEPLOYMENT_TARGET_MACOSX */
830
831 return kCFStringEncodingInvalidId;
832 }
833
834 __private_extern__ const char *__CFStringEncodingGetName(CFStringEncoding encoding) {
835 switch (encoding) {
836 case kCFStringEncodingUTF8: return "Unicode (UTF-8)"; break;
837 case kCFStringEncodingUTF16: return "Unicode (UTF-16)"; break;
838 case kCFStringEncodingUTF16BE: return "Unicode (UTF-16BE)"; break;
839 case kCFStringEncodingUTF16LE: return "Unicode (UTF-16LE)"; break;
840 case kCFStringEncodingUTF32: return "Unicode (UTF-32)"; break;
841 case kCFStringEncodingUTF32BE: return "Unicode (UTF-32BE)"; break;
842 case kCFStringEncodingUTF32LE: return "Unicode (UTF-32LE)"; break;
843 case kCFStringEncodingNonLossyASCII: return "Non-lossy ASCII"; break;
844 case kCFStringEncodingUTF7: return "Unicode (UTF-7)"; break;
845 }
846
847 #if DEPLOYMENT_TARGET_MACOSX
848 if (0x0200 == (encoding & 0x0F00)) {
849 encoding &= 0x00FF;
850
851 if (encoding <= (sizeof(__CFISONameList) / sizeof(*__CFISONameList))) return __CFISONameList[encoding - 1];
852 } else {
853 CFIndex index = __CFGetEncodingIndex(encoding);
854
855 if (kCFNotFound != index) return __CFOtherNameList[index];
856 }
857 #endif /* DEPLOYMENT_TARGET_MACOSX */
858
859 return NULL;
860 }