]> git.saurik.com Git - apple/cf.git/blob - CFStringEncodingDatabase.c
CF-1152.14.tar.gz
[apple/cf.git] / CFStringEncodingDatabase.c
1 /*
2 * Copyright (c) 2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFStringEncodingDatabase.c
25 Copyright (c) 2005-2014, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFStringEncodingExt.h>
31 #include "CFStringEncodingConverterPriv.h"
32 #include "CFStringEncodingDatabase.h"
33 #include <stdio.h>
34
35 #define ISO8859CODEPAGE_BASE (28590)
36
37 static const uint16_t __CFKnownEncodingList[] = {
38 kCFStringEncodingMacRoman,
39 kCFStringEncodingMacJapanese,
40 kCFStringEncodingMacChineseTrad,
41 kCFStringEncodingMacKorean,
42 kCFStringEncodingMacArabic,
43 kCFStringEncodingMacHebrew,
44 kCFStringEncodingMacGreek,
45 kCFStringEncodingMacCyrillic,
46 kCFStringEncodingMacDevanagari,
47 kCFStringEncodingMacGurmukhi,
48 kCFStringEncodingMacGujarati,
49 kCFStringEncodingMacOriya,
50 kCFStringEncodingMacBengali,
51 kCFStringEncodingMacTamil,
52 kCFStringEncodingMacTelugu,
53 kCFStringEncodingMacKannada,
54 kCFStringEncodingMacMalayalam,
55 kCFStringEncodingMacSinhalese,
56 kCFStringEncodingMacBurmese,
57 kCFStringEncodingMacKhmer,
58 kCFStringEncodingMacThai,
59 kCFStringEncodingMacLaotian,
60 kCFStringEncodingMacGeorgian,
61 kCFStringEncodingMacArmenian,
62 kCFStringEncodingMacChineseSimp,
63 kCFStringEncodingMacTibetan,
64 kCFStringEncodingMacMongolian,
65 kCFStringEncodingMacEthiopic,
66 kCFStringEncodingMacCentralEurRoman,
67 kCFStringEncodingMacVietnamese,
68 kCFStringEncodingMacSymbol,
69 kCFStringEncodingMacDingbats,
70 kCFStringEncodingMacTurkish,
71 kCFStringEncodingMacCroatian,
72 kCFStringEncodingMacIcelandic,
73 kCFStringEncodingMacRomanian,
74 kCFStringEncodingMacCeltic,
75 kCFStringEncodingMacGaelic,
76 kCFStringEncodingMacFarsi,
77 kCFStringEncodingMacUkrainian,
78 kCFStringEncodingMacInuit,
79
80 kCFStringEncodingDOSLatinUS,
81 kCFStringEncodingDOSGreek,
82 kCFStringEncodingDOSBalticRim,
83 kCFStringEncodingDOSLatin1,
84 kCFStringEncodingDOSGreek1,
85 kCFStringEncodingDOSLatin2,
86 kCFStringEncodingDOSCyrillic,
87 kCFStringEncodingDOSTurkish,
88 kCFStringEncodingDOSPortuguese,
89 kCFStringEncodingDOSIcelandic,
90 kCFStringEncodingDOSHebrew,
91 kCFStringEncodingDOSCanadianFrench,
92 kCFStringEncodingDOSArabic,
93 kCFStringEncodingDOSNordic,
94 kCFStringEncodingDOSRussian,
95 kCFStringEncodingDOSGreek2,
96 kCFStringEncodingDOSThai,
97 kCFStringEncodingDOSJapanese,
98 kCFStringEncodingDOSChineseSimplif,
99 kCFStringEncodingDOSKorean,
100 kCFStringEncodingDOSChineseTrad,
101
102 kCFStringEncodingWindowsLatin1,
103 kCFStringEncodingWindowsLatin2,
104 kCFStringEncodingWindowsCyrillic,
105 kCFStringEncodingWindowsGreek,
106 kCFStringEncodingWindowsLatin5,
107 kCFStringEncodingWindowsHebrew,
108 kCFStringEncodingWindowsArabic,
109 kCFStringEncodingWindowsBalticRim,
110 kCFStringEncodingWindowsVietnamese,
111 kCFStringEncodingWindowsKoreanJohab,
112 kCFStringEncodingASCII,
113
114 kCFStringEncodingShiftJIS_X0213,
115 kCFStringEncodingGB_18030_2000,
116
117 kCFStringEncodingISO_2022_JP,
118 kCFStringEncodingISO_2022_JP_2,
119 kCFStringEncodingISO_2022_JP_1,
120 kCFStringEncodingISO_2022_JP_3,
121 kCFStringEncodingISO_2022_CN,
122 kCFStringEncodingISO_2022_CN_EXT,
123 kCFStringEncodingISO_2022_KR,
124 kCFStringEncodingEUC_JP,
125 kCFStringEncodingEUC_CN,
126 kCFStringEncodingEUC_TW,
127 kCFStringEncodingEUC_KR,
128
129 kCFStringEncodingShiftJIS,
130
131 kCFStringEncodingKOI8_R,
132
133 kCFStringEncodingBig5,
134
135 kCFStringEncodingMacRomanLatin1,
136 kCFStringEncodingHZ_GB_2312,
137 kCFStringEncodingBig5_HKSCS_1999,
138 kCFStringEncodingVISCII,
139 kCFStringEncodingKOI8_U,
140 kCFStringEncodingBig5_E,
141 kCFStringEncodingUTF7_IMAP,
142
143 kCFStringEncodingNextStepLatin,
144
145 kCFStringEncodingEBCDIC_CP037
146 };
147
148 // Windows codepage mapping
149 static const uint16_t __CFWindowsCPList[] = {
150 10000,
151 10001,
152 10002,
153 10003,
154 10004,
155 10005,
156 10006,
157 10007,
158 0,
159 0,
160 0,
161 0,
162 0,
163 0,
164 0,
165 0,
166 0,
167 0,
168 0,
169 0,
170 10021,
171 0,
172 0,
173 0,
174 10008,
175 0,
176 0,
177 0,
178 10029,
179 0,
180 0,
181 0,
182 10081,
183 10082,
184 10079,
185 10010,
186 0,
187 0,
188 0,
189 10017,
190 0,
191
192 437,
193 737,
194 775,
195 850,
196 851,
197 852,
198 855,
199 857,
200 860,
201 861,
202 862,
203 863,
204 864,
205 865,
206 866,
207 869,
208 874,
209 932,
210 936,
211 949,
212 950,
213
214 1252,
215 1250,
216 1251,
217 1253,
218 1254,
219 1255,
220 1256,
221 1257,
222 1258,
223 1361,
224
225 20127,
226
227 0,
228 54936,
229
230 50221, // we prefere this over 50220/50221 since that's what CF coverter generates
231 0,
232 0,
233 0,
234 50227,
235 0,
236 50225,
237
238 51932,
239 51936,
240 51950,
241 51949,
242
243 0,
244
245 20866,
246
247 0,
248
249 0,
250 52936,
251 0,
252 0,
253 21866,
254 0,
255 0,
256
257 0,
258
259 37
260 };
261
262 // Canonical name
263 static const char *__CFCanonicalNameList[] = {
264 "macintosh",
265 "japanese",
266 "trad-chinese",
267 "korean",
268 "arabic",
269 "hebrew",
270 "greek",
271 "cyrillic",
272 "devanagari",
273 "gurmukhi",
274 "gujarati",
275 "oriya",
276 "bengali",
277 "tamil",
278 "telugu",
279 "kannada",
280 "malayalam",
281 "sinhalese",
282 "burmese",
283 "khmer",
284 "thai",
285 "laotian",
286 "georgian",
287 "armenian",
288 "simp-chinese",
289 "tibetan",
290 "mongolian",
291 "ethiopic",
292 "centraleurroman",
293 "vietnamese",
294 "symbol",
295 "dingbats",
296 "turkish",
297 "croatian",
298 "icelandic",
299 "romanian",
300 "celtic",
301 "gaelic",
302 "farsi",
303 "ukrainian",
304 "inuit",
305
306 NULL,
307 NULL,
308 NULL,
309 NULL,
310 NULL,
311 NULL,
312 NULL,
313 NULL,
314 NULL,
315 NULL,
316 NULL,
317 NULL,
318 NULL,
319 NULL,
320 NULL,
321 NULL,
322 NULL,
323 NULL,
324 NULL,
325 NULL,
326 NULL,
327
328 NULL,
329 NULL,
330 NULL,
331 NULL,
332 NULL,
333 NULL,
334 NULL,
335 NULL,
336 NULL,
337 NULL,
338
339 "us-ascii",
340
341 NULL,
342 "gb18030",
343
344 "iso-2022-jp",
345 "iso-2022-jp-2",
346 "iso-2022-jp-1",
347 "iso-2022-jp-3",
348 "iso-2022-cn",
349 "iso-2022-cn-ext",
350 "iso-2022-kr",
351 "euc-jp",
352 "gb2312",
353 "euc-tw",
354 "euc-kr",
355
356 "shift_jis",
357
358 "koi8-r",
359
360 "big5",
361
362 "roman-latin1",
363 "hz-gb-2312",
364 "big5-hkscs",
365 "viscii",
366 "koi8-u",
367 NULL,
368 "utf7-imap",
369
370 "x-nextstep",
371
372 "ibm037",
373 };
374
375 static inline CFIndex __CFGetEncodingIndex(CFStringEncoding encoding) {
376 const uint16_t *head = __CFKnownEncodingList;
377 const uint16_t *tail = head + ((sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList)) - 1);
378 const uint16_t *middle;
379
380 encoding &= 0x0FFF;
381 while (head <= tail) {
382 middle = head + ((tail - head) >> 1);
383
384 if (encoding == *middle) {
385 return middle - __CFKnownEncodingList;
386 } else if (encoding < *middle) {
387 tail = middle - 1;
388 } else {
389 head = middle + 1;
390 }
391 }
392
393 return kCFNotFound;
394 }
395
396 CF_PRIVATE uint16_t __CFStringEncodingGetWindowsCodePage(CFStringEncoding encoding) {
397 CFStringEncoding encodingBase = encoding & 0x0F00;
398
399 if (0x0100 == encodingBase) { // UTF
400 switch (encoding) {
401 case kCFStringEncodingUTF7: return 65000;
402 case kCFStringEncodingUTF8: return 65001;
403 case kCFStringEncodingUTF16: return 1200;
404 case kCFStringEncodingUTF16BE: return 1201;
405 case kCFStringEncodingUTF32: return 65005;
406 case kCFStringEncodingUTF32BE: return 65006;
407 }
408 } else if (0x0200 == encodingBase) { // ISO 8859 range
409 return ISO8859CODEPAGE_BASE + (encoding & 0xFF);
410 } else { // others
411 CFIndex index = __CFGetEncodingIndex(encoding);
412
413 if (kCFNotFound != index) return __CFWindowsCPList[index];
414 }
415
416 return 0;
417 }
418
419 CF_PRIVATE CFStringEncoding __CFStringEncodingGetFromWindowsCodePage(uint16_t codepage) {
420 switch (codepage) {
421 case 65001: return kCFStringEncodingUTF8;
422 case 1200: return kCFStringEncodingUTF16;
423 case 0: return kCFStringEncodingInvalidId;
424 case 1201: return kCFStringEncodingUTF16BE;
425 case 65005: return kCFStringEncodingUTF32;
426 case 65006: return kCFStringEncodingUTF32BE;
427 case 65000: return kCFStringEncodingUTF7;
428 }
429
430 if ((codepage > ISO8859CODEPAGE_BASE) && (codepage <= (ISO8859CODEPAGE_BASE + 16))) {
431 return (codepage - ISO8859CODEPAGE_BASE) + 0x0200;
432 } else {
433 static CFMutableDictionaryRef mappingTable = NULL;
434 static CFLock_t lock = CFLockInit;
435 uintptr_t value;
436
437 __CFLock(&lock);
438 if (NULL == mappingTable) {
439 CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
440
441 mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
442
443 for (index = 0;index < count;index++) {
444 if (0 != __CFWindowsCPList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFWindowsCPList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
445 }
446 }
447 __CFUnlock(&lock);
448
449 if (CFDictionaryGetValueIfPresent(mappingTable, (const void *)(uintptr_t)codepage, (const void **)&value)) return (CFStringEncoding)value;
450 }
451
452
453 return kCFStringEncodingInvalidId;
454 }
455
456 CF_PRIVATE bool __CFStringEncodingGetCanonicalName(CFStringEncoding encoding, char *buffer, CFIndex bufferSize) {
457 const char *format = "%s";
458 const char *name = NULL;
459 uint32_t value = 0;
460 CFIndex index;
461
462 switch (encoding & 0x0F00) {
463 case 0x0100: // UTF range
464 switch (encoding) {
465 case kCFStringEncodingUTF7: name = "utf-7"; break;
466 case kCFStringEncodingUTF8: name = "utf-8"; break;
467 case kCFStringEncodingUTF16: name = "utf-16"; break;
468 case kCFStringEncodingUTF16BE: name = "utf-16be"; break;
469 case kCFStringEncodingUTF16LE: name = "utf-16le"; break;
470 case kCFStringEncodingUTF32: name = "utf-32"; break;
471 case kCFStringEncodingUTF32BE: name = "utf-32be"; break;
472 case kCFStringEncodingUTF32LE: name = "utf-32le"; break;
473 }
474 break;
475
476 case 0x0200: // ISO 8859 range
477 format = "iso-8859-%d";
478 value = (encoding & 0xFF);
479 break;
480
481 case 0x0400: // DOS code page range
482 case 0x0500: // Windows code page range
483 index = __CFGetEncodingIndex(encoding);
484
485 if (kCFNotFound != index) {
486 value = __CFWindowsCPList[index];
487 if (0 != value) format = ((0x0400 == (encoding & 0x0F00)) ? "cp%d" : "windows-%d");
488 }
489 break;
490
491 default: // others
492 index = __CFGetEncodingIndex(encoding);
493
494 if (kCFNotFound != index) {
495 if (((0 == (encoding & 0x0F00)) && (kCFStringEncodingMacRoman != encoding)) || (kCFStringEncodingMacRomanLatin1 == encoding)) format = "x-mac-%s";
496 name = (const char *)__CFCanonicalNameList[index];
497 }
498 break;
499 }
500
501 if ((0 == value) && (NULL == name)) {
502 return false;
503 } else if (0 != value) {
504 return ((snprintf(buffer, bufferSize, format, value) < bufferSize) ? true : false);
505 } else {
506 return ((snprintf(buffer, bufferSize, format, name) < bufferSize) ? true : false);
507 }
508 }
509
510 #define LENGTH_LIMIT (256)
511 static Boolean __CFCanonicalNameCompare(const void *value1, const void *value2) { return ((0 == strncasecmp_l((const char *)value1, (const char *)value2, LENGTH_LIMIT, NULL)) ? true : false); }
512
513 static CFHashCode __CFCanonicalNameHash(const void *value) {
514 const char *name = (const char *)value;
515 CFHashCode code = 0;
516
517 while ((0 != *name) && ((name - (const char *)value) < LENGTH_LIMIT)) {
518 char character = *(name++);
519
520 code += (character + (((character >= 'A') && (character <= 'Z')) ? 'a' - 'A' : 0));
521 }
522
523 return code * (name - (const char *)value);
524 }
525
526 CF_PRIVATE CFStringEncoding __CFStringEncodingGetFromCanonicalName(const char *canonicalName) {
527 CFStringEncoding encoding;
528 CFIndex prefixLength;
529 static CFMutableDictionaryRef mappingTable = NULL;
530 static CFLock_t lock = CFLockInit;
531
532 prefixLength = strlen("iso-8859-");
533 if (0 == strncasecmp_l(canonicalName, "iso-8859-", prefixLength, NULL)) {// do ISO
534 encoding = strtol(canonicalName + prefixLength, NULL, 10);
535
536 return (((0 == encoding) || (encoding > 16)) ? kCFStringEncodingInvalidId : encoding + 0x0200);
537 }
538
539 prefixLength = strlen("cp");
540 if (0 == strncasecmp_l(canonicalName, "cp", prefixLength, NULL)) {// do DOS
541 encoding = strtol(canonicalName + prefixLength, NULL, 10);
542
543 return __CFStringEncodingGetFromWindowsCodePage(encoding);
544 }
545
546 prefixLength = strlen("windows-");
547 if (0 == strncasecmp_l(canonicalName, "windows-", prefixLength, NULL)) {// do DOS
548 encoding = strtol(canonicalName + prefixLength, NULL, 10);
549
550 return __CFStringEncodingGetFromWindowsCodePage(encoding);
551 }
552
553 __CFLock(&lock);
554 if (NULL == mappingTable) {
555 CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
556
557 CFDictionaryKeyCallBacks keys = {
558 0, NULL, NULL, NULL, &__CFCanonicalNameCompare, &__CFCanonicalNameHash
559 };
560
561 mappingTable = CFDictionaryCreateMutable(NULL, 0, &keys, NULL);
562
563 // Add UTFs
564 CFDictionarySetValue(mappingTable, "utf-7", (const void *)kCFStringEncodingUTF7);
565 CFDictionarySetValue(mappingTable, "utf-8", (const void *)kCFStringEncodingUTF8);
566 CFDictionarySetValue(mappingTable, "utf-16", (const void *)kCFStringEncodingUTF16);
567 CFDictionarySetValue(mappingTable, "utf-16be", (const void *)kCFStringEncodingUTF16BE);
568 CFDictionarySetValue(mappingTable, "utf-16le", (const void *)kCFStringEncodingUTF16LE);
569 CFDictionarySetValue(mappingTable, "utf-32", (const void *)kCFStringEncodingUTF32);
570 CFDictionarySetValue(mappingTable, "utf-32be", (const void *)kCFStringEncodingUTF32BE);
571 CFDictionarySetValue(mappingTable, "utf-32le", (const void *)kCFStringEncodingUTF32LE);
572
573 for (index = 0;index < count;index++) {
574 if (NULL != __CFCanonicalNameList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFCanonicalNameList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
575 }
576 }
577 __CFUnlock(&lock);
578
579 if (0 == strncasecmp_l(canonicalName, "macintosh", sizeof("macintosh") - 1, NULL)) return kCFStringEncodingMacRoman;
580
581
582 prefixLength = strlen("x-mac-");
583 encoding = (CFStringEncoding)(CFIndex)CFDictionaryGetValue(mappingTable, canonicalName + ((0 == strncasecmp_l(canonicalName, "x-mac-", prefixLength, NULL)) ? prefixLength : 0));
584
585 return ((0 == encoding) ? kCFStringEncodingInvalidId : encoding);
586 }
587 #undef LENGTH_LIMIT
588
589 #if DEPLOYMENT_TARGET_MACOSX
590 // This list indexes from DOS range
591 static uint16_t __CFISO8859SimilarScriptList[] = {
592 kCFStringEncodingMacRoman,
593 kCFStringEncodingMacCentralEurRoman,
594 kCFStringEncodingMacRoman,
595 kCFStringEncodingMacCentralEurRoman,
596 kCFStringEncodingMacCyrillic,
597 kCFStringEncodingMacArabic,
598 kCFStringEncodingMacGreek,
599 kCFStringEncodingMacHebrew,
600 kCFStringEncodingMacTurkish,
601 kCFStringEncodingMacInuit,
602 kCFStringEncodingMacThai,
603 kCFStringEncodingMacRoman,
604 kCFStringEncodingMacCentralEurRoman,
605 kCFStringEncodingMacCeltic,
606 kCFStringEncodingMacRoman,
607 kCFStringEncodingMacRomanian};
608
609 static uint16_t __CFOtherSimilarScriptList[] = {
610 kCFStringEncodingMacRoman,
611 kCFStringEncodingMacGreek,
612 kCFStringEncodingMacCentralEurRoman,
613 kCFStringEncodingMacRoman,
614 kCFStringEncodingMacGreek,
615 kCFStringEncodingMacCentralEurRoman,
616 kCFStringEncodingMacCyrillic,
617 kCFStringEncodingMacTurkish,
618 kCFStringEncodingMacRoman,
619 kCFStringEncodingMacIcelandic,
620 kCFStringEncodingMacHebrew,
621 kCFStringEncodingMacRoman,
622 kCFStringEncodingMacArabic,
623 kCFStringEncodingMacInuit,
624 kCFStringEncodingMacCyrillic,
625 kCFStringEncodingMacGreek,
626 kCFStringEncodingMacThai,
627 kCFStringEncodingMacJapanese,
628 kCFStringEncodingMacChineseSimp,
629 kCFStringEncodingMacKorean,
630 kCFStringEncodingMacChineseTrad,
631
632 kCFStringEncodingMacRoman,
633 kCFStringEncodingMacCentralEurRoman,
634 kCFStringEncodingMacCyrillic,
635 kCFStringEncodingMacGreek,
636 kCFStringEncodingMacTurkish,
637 kCFStringEncodingMacHebrew,
638 kCFStringEncodingMacArabic,
639 kCFStringEncodingMacCentralEurRoman,
640 kCFStringEncodingMacVietnamese,
641 kCFStringEncodingMacKorean,
642
643 kCFStringEncodingMacRoman,
644
645 kCFStringEncodingMacJapanese,
646 kCFStringEncodingMacChineseSimp,
647
648 kCFStringEncodingMacJapanese,
649 kCFStringEncodingMacJapanese,
650 kCFStringEncodingMacJapanese,
651 kCFStringEncodingMacJapanese,
652 kCFStringEncodingMacChineseSimp,
653 kCFStringEncodingMacChineseSimp,
654 kCFStringEncodingMacKorean,
655 kCFStringEncodingMacJapanese,
656 kCFStringEncodingMacChineseSimp,
657 kCFStringEncodingMacChineseTrad,
658 kCFStringEncodingMacKorean,
659
660 kCFStringEncodingMacJapanese,
661
662 kCFStringEncodingMacCyrillic,
663
664 kCFStringEncodingMacChineseTrad,
665
666 kCFStringEncodingMacRoman,
667 kCFStringEncodingMacChineseSimp,
668 kCFStringEncodingMacChineseTrad,
669 kCFStringEncodingMacVietnamese,
670 kCFStringEncodingMacUkrainian,
671 kCFStringEncodingMacChineseTrad,
672 kCFStringEncodingMacRoman,
673
674 kCFStringEncodingMacRoman,
675
676 kCFStringEncodingMacRoman
677 };
678
679 static const char *__CFISONameList[] = {
680 "Western (ISO Latin 1)",
681 "Central European (ISO Latin 2)",
682 "Western (ISO Latin 3)",
683 "Central European (ISO Latin 4)",
684 "Cyrillic (ISO 8859-5)",
685 "Arabic (ISO 8859-6)",
686 "Greek (ISO 8859-7)",
687 "Hebrew (ISO 8859-8)",
688 "Turkish (ISO Latin 5)",
689 "Nordic (ISO Latin 6)",
690 "Thai (ISO 8859-11)",
691 NULL,
692 "Baltic (ISO Latin 7)",
693 "Celtic (ISO Latin 8)",
694 "Western (ISO Latin 9)",
695 "Romanian (ISO Latin 10)",
696 };
697
698 static const char *__CFOtherNameList[] = {
699 "Western (Mac OS Roman)",
700 "Japanese (Mac OS)",
701 "Traditional Chinese (Mac OS)",
702 "Korean (Mac OS)",
703 "Arabic (Mac OS)",
704 "Hebrew (Mac OS)",
705 "Greek (Mac OS)",
706 "Cyrillic (Mac OS)",
707 "Devanagari (Mac OS)",
708 "Gurmukhi (Mac OS)",
709 "Gujarati (Mac OS)",
710 "Oriya (Mac OS)",
711 "Bengali (Mac OS)",
712 "Tamil (Mac OS)",
713 "Telugu (Mac OS)",
714 "Kannada (Mac OS)",
715 "Malayalam (Mac OS)",
716 "Sinhalese (Mac OS)",
717 "Burmese (Mac OS)",
718 "Khmer (Mac OS)",
719 "Thai (Mac OS)",
720 "Laotian (Mac OS)",
721 "Georgian (Mac OS)",
722 "Armenian (Mac OS)",
723 "Simplified Chinese (Mac OS)",
724 "Tibetan (Mac OS)",
725 "Mongolian (Mac OS)",
726 "Ethiopic (Mac OS)",
727 "Central European (Mac OS)",
728 "Vietnamese (Mac OS)",
729 "Symbol (Mac OS)",
730 "Dingbats (Mac OS)",
731 "Turkish (Mac OS)",
732 "Croatian (Mac OS)",
733 "Icelandic (Mac OS)",
734 "Romanian (Mac OS)",
735 "Celtic (Mac OS)",
736 "Gaelic (Mac OS)",
737 "Farsi (Mac OS)",
738 "Cyrillic (Mac OS Ukrainian)",
739 "Inuit (Mac OS)",
740 "Latin-US (DOS)",
741 "Greek (DOS)",
742 "Baltic (DOS)",
743 "Western (DOS Latin 1)",
744 "Greek (DOS Greek 1)",
745 "Central European (DOS Latin 2)",
746 "Cyrillic (DOS)",
747 "Turkish (DOS)",
748 "Portuguese (DOS)",
749 "Icelandic (DOS)",
750 "Hebrew (DOS)",
751 "Canadian French (DOS)",
752 "Arabic (DOS)",
753 "Nordic (DOS)",
754 "Russian (DOS)",
755 "Greek (DOS Greek 2)",
756 "Thai (Windows, DOS)",
757 "Japanese (Windows, DOS)",
758 "Simplified Chinese (Windows, DOS)",
759 "Korean (Windows, DOS)",
760 "Traditional Chinese (Windows, DOS)",
761 "Western (Windows Latin 1)",
762 "Central European (Windows Latin 2)",
763 "Cyrillic (Windows)",
764 "Greek (Windows)",
765 "Turkish (Windows Latin 5)",
766 "Hebrew (Windows)",
767 "Arabic (Windows)",
768 "Baltic (Windows)",
769 "Vietnamese (Windows)",
770 "Korean (Windows Johab)",
771 "Western (ASCII)",
772 "Japanese (Shift JIS X0213)",
773 "Chinese (GB 18030)",
774 "Japanese (ISO 2022-JP)",
775 "Japanese (ISO 2022-JP-2)",
776 "Japanese (ISO 2022-JP-1)",
777 "Japanese (ISO 2022-JP-3)",
778 "Chinese (ISO 2022-CN)",
779 "Chinese (ISO 2022-CN-EXT)",
780 "Korean (ISO 2022-KR)",
781 "Japanese (EUC)",
782 "Simplified Chinese (GB 2312)",
783 "Traditional Chinese (EUC)",
784 "Korean (EUC)",
785 "Japanese (Shift JIS)",
786 "Cyrillic (KOI8-R)",
787 "Traditional Chinese (Big 5)",
788 "Western (Mac Mail)",
789 "Simplified Chinese (HZ GB 2312)",
790 "Traditional Chinese (Big 5 HKSCS)",
791 NULL,
792 "Ukrainian (KOI8-U)",
793 "Traditional Chinese (Big 5-E)",
794 NULL,
795 "Western (NextStep)",
796 "Western (EBCDIC Latin 1)",
797 };
798 #endif /* DEPLOYMENT_TARGET_MACOSX */
799
800 CF_PRIVATE CFStringEncoding __CFStringEncodingGetMostCompatibleMacScript(CFStringEncoding encoding) {
801 #if DEPLOYMENT_TARGET_MACOSX
802 switch (encoding & 0x0F00) {
803 case 0: return encoding & 0xFF; break; // Mac scripts
804
805 case 0x0100: return kCFStringEncodingUnicode; break; // Unicode
806
807 case 0x200: // ISO 8859
808 return (((encoding & 0xFF) <= (sizeof(__CFISO8859SimilarScriptList) / sizeof(*__CFISO8859SimilarScriptList))) ? __CFISO8859SimilarScriptList[(encoding & 0xFF) - 1] : kCFStringEncodingInvalidId);
809 break;
810
811 default: {
812 CFIndex index = __CFGetEncodingIndex(encoding);
813
814 if (kCFNotFound != index) {
815 index -= __CFGetEncodingIndex(kCFStringEncodingDOSLatinUS);
816 return __CFOtherSimilarScriptList[index];
817 }
818 }
819 }
820 #endif /* DEPLOYMENT_TARGET_MACOSX */
821
822 return kCFStringEncodingInvalidId;
823 }
824
825 CF_PRIVATE const char *__CFStringEncodingGetName(CFStringEncoding encoding) {
826 switch (encoding) {
827 case kCFStringEncodingUTF8: return "Unicode (UTF-8)"; break;
828 case kCFStringEncodingUTF16: return "Unicode (UTF-16)"; break;
829 case kCFStringEncodingUTF16BE: return "Unicode (UTF-16BE)"; break;
830 case kCFStringEncodingUTF16LE: return "Unicode (UTF-16LE)"; break;
831 case kCFStringEncodingUTF32: return "Unicode (UTF-32)"; break;
832 case kCFStringEncodingUTF32BE: return "Unicode (UTF-32BE)"; break;
833 case kCFStringEncodingUTF32LE: return "Unicode (UTF-32LE)"; break;
834 case kCFStringEncodingNonLossyASCII: return "Non-lossy ASCII"; break;
835 case kCFStringEncodingUTF7: return "Unicode (UTF-7)"; break;
836 }
837
838 #if DEPLOYMENT_TARGET_MACOSX
839 if (0x0200 == (encoding & 0x0F00)) {
840 encoding &= 0x00FF;
841
842 if (encoding <= (sizeof(__CFISONameList) / sizeof(*__CFISONameList))) return __CFISONameList[encoding - 1];
843 } else {
844 CFIndex index = __CFGetEncodingIndex(encoding);
845
846 if (kCFNotFound != index) return __CFOtherNameList[index];
847 }
848 #endif /* DEPLOYMENT_TARGET_MACOSX */
849
850 return NULL;
851 }