]>
Commit | Line | Data |
---|---|---|
cf7d2af9 | 1 | /* |
8ca704e1 | 2 | * Copyright (c) 2011 Apple Inc. All rights reserved. |
cf7d2af9 A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
f64f9b69 | 23 | |
8ca704e1 A |
24 | /* CFStringEncodingDatabase.c |
25 | Copyright (c) 2005-2011, Apple Inc. All rights reserved. | |
26 | Responsibility: Aki Inoue | |
27 | */ | |
cf7d2af9 A |
28 | |
29 | #include "CFInternal.h" | |
30 | #include <CoreFoundation/CFStringEncodingExt.h> | |
31 | #include "CFStringEncodingConverterPriv.h" | |
32 | #include "CFStringEncodingDatabase.h" | |
33 | #include <stdio.h> | |
34 | ||
cf7d2af9 A |
35 | #define ISO8859CODEPAGE_BASE (28590) |
36 | ||
37 | static const uint16_t __CFKnownEncodingList[] = { | |
38 | kCFStringEncodingMacRoman, | |
39 | kCFStringEncodingMacJapanese, | |
40 | kCFStringEncodingMacChineseTrad, | |
41 | kCFStringEncodingMacKorean, | |
42 | kCFStringEncodingMacArabic, | |
43 | kCFStringEncodingMacHebrew, | |
44 | kCFStringEncodingMacGreek, | |
45 | kCFStringEncodingMacCyrillic, | |
46 | kCFStringEncodingMacDevanagari, | |
47 | kCFStringEncodingMacGurmukhi, | |
48 | kCFStringEncodingMacGujarati, | |
49 | kCFStringEncodingMacOriya, | |
50 | kCFStringEncodingMacBengali, | |
51 | kCFStringEncodingMacTamil, | |
52 | kCFStringEncodingMacTelugu, | |
53 | kCFStringEncodingMacKannada, | |
54 | kCFStringEncodingMacMalayalam, | |
55 | kCFStringEncodingMacSinhalese, | |
56 | kCFStringEncodingMacBurmese, | |
57 | kCFStringEncodingMacKhmer, | |
58 | kCFStringEncodingMacThai, | |
59 | kCFStringEncodingMacLaotian, | |
60 | kCFStringEncodingMacGeorgian, | |
61 | kCFStringEncodingMacArmenian, | |
62 | kCFStringEncodingMacChineseSimp, | |
63 | kCFStringEncodingMacTibetan, | |
64 | kCFStringEncodingMacMongolian, | |
65 | kCFStringEncodingMacEthiopic, | |
66 | kCFStringEncodingMacCentralEurRoman, | |
67 | kCFStringEncodingMacVietnamese, | |
68 | kCFStringEncodingMacSymbol, | |
69 | kCFStringEncodingMacDingbats, | |
70 | kCFStringEncodingMacTurkish, | |
71 | kCFStringEncodingMacCroatian, | |
72 | kCFStringEncodingMacIcelandic, | |
73 | kCFStringEncodingMacRomanian, | |
74 | kCFStringEncodingMacCeltic, | |
75 | kCFStringEncodingMacGaelic, | |
76 | kCFStringEncodingMacFarsi, | |
77 | kCFStringEncodingMacUkrainian, | |
78 | kCFStringEncodingMacInuit, | |
79 | ||
80 | kCFStringEncodingDOSLatinUS, | |
81 | kCFStringEncodingDOSGreek, | |
82 | kCFStringEncodingDOSBalticRim, | |
83 | kCFStringEncodingDOSLatin1, | |
84 | kCFStringEncodingDOSGreek1, | |
85 | kCFStringEncodingDOSLatin2, | |
86 | kCFStringEncodingDOSCyrillic, | |
87 | kCFStringEncodingDOSTurkish, | |
88 | kCFStringEncodingDOSPortuguese, | |
89 | kCFStringEncodingDOSIcelandic, | |
90 | kCFStringEncodingDOSHebrew, | |
91 | kCFStringEncodingDOSCanadianFrench, | |
92 | kCFStringEncodingDOSArabic, | |
93 | kCFStringEncodingDOSNordic, | |
94 | kCFStringEncodingDOSRussian, | |
95 | kCFStringEncodingDOSGreek2, | |
96 | kCFStringEncodingDOSThai, | |
97 | kCFStringEncodingDOSJapanese, | |
98 | kCFStringEncodingDOSChineseSimplif, | |
99 | kCFStringEncodingDOSKorean, | |
100 | kCFStringEncodingDOSChineseTrad, | |
101 | ||
102 | kCFStringEncodingWindowsLatin1, | |
103 | kCFStringEncodingWindowsLatin2, | |
104 | kCFStringEncodingWindowsCyrillic, | |
105 | kCFStringEncodingWindowsGreek, | |
106 | kCFStringEncodingWindowsLatin5, | |
107 | kCFStringEncodingWindowsHebrew, | |
108 | kCFStringEncodingWindowsArabic, | |
109 | kCFStringEncodingWindowsBalticRim, | |
110 | kCFStringEncodingWindowsVietnamese, | |
111 | kCFStringEncodingWindowsKoreanJohab, | |
112 | kCFStringEncodingASCII, | |
113 | ||
114 | kCFStringEncodingShiftJIS_X0213, | |
115 | kCFStringEncodingGB_18030_2000, | |
116 | ||
117 | kCFStringEncodingISO_2022_JP, | |
118 | kCFStringEncodingISO_2022_JP_2, | |
119 | kCFStringEncodingISO_2022_JP_1, | |
120 | kCFStringEncodingISO_2022_JP_3, | |
121 | kCFStringEncodingISO_2022_CN, | |
122 | kCFStringEncodingISO_2022_CN_EXT, | |
123 | kCFStringEncodingISO_2022_KR, | |
124 | kCFStringEncodingEUC_JP, | |
125 | kCFStringEncodingEUC_CN, | |
126 | kCFStringEncodingEUC_TW, | |
127 | kCFStringEncodingEUC_KR, | |
128 | ||
129 | kCFStringEncodingShiftJIS, | |
130 | ||
131 | kCFStringEncodingKOI8_R, | |
132 | ||
133 | kCFStringEncodingBig5, | |
134 | ||
135 | kCFStringEncodingMacRomanLatin1, | |
136 | kCFStringEncodingHZ_GB_2312, | |
137 | kCFStringEncodingBig5_HKSCS_1999, | |
138 | kCFStringEncodingVISCII, | |
139 | kCFStringEncodingKOI8_U, | |
140 | kCFStringEncodingBig5_E, | |
141 | kCFStringEncodingUTF7_IMAP, | |
142 | ||
143 | kCFStringEncodingNextStepLatin, | |
144 | ||
145 | kCFStringEncodingEBCDIC_CP037 | |
146 | }; | |
147 | ||
148 | // Windows codepage mapping | |
149 | static const uint16_t __CFWindowsCPList[] = { | |
150 | 10000, | |
151 | 10001, | |
152 | 10002, | |
153 | 10003, | |
154 | 10004, | |
155 | 10005, | |
156 | 10006, | |
157 | 10007, | |
158 | 0, | |
159 | 0, | |
160 | 0, | |
161 | 0, | |
162 | 0, | |
163 | 0, | |
164 | 0, | |
165 | 0, | |
166 | 0, | |
167 | 0, | |
168 | 0, | |
169 | 0, | |
170 | 10021, | |
171 | 0, | |
172 | 0, | |
173 | 0, | |
174 | 10008, | |
175 | 0, | |
176 | 0, | |
177 | 0, | |
178 | 10029, | |
179 | 0, | |
180 | 0, | |
181 | 0, | |
182 | 10081, | |
183 | 10082, | |
184 | 10079, | |
185 | 10010, | |
186 | 0, | |
187 | 0, | |
188 | 0, | |
189 | 10017, | |
190 | 0, | |
191 | ||
192 | 437, | |
193 | 737, | |
194 | 775, | |
195 | 850, | |
196 | 851, | |
197 | 852, | |
198 | 855, | |
199 | 857, | |
200 | 860, | |
201 | 861, | |
202 | 862, | |
203 | 863, | |
204 | 864, | |
205 | 865, | |
206 | 866, | |
207 | 869, | |
208 | 874, | |
209 | 932, | |
210 | 936, | |
211 | 949, | |
212 | 950, | |
213 | ||
214 | 1252, | |
215 | 1250, | |
216 | 1251, | |
217 | 1253, | |
218 | 1254, | |
219 | 1255, | |
220 | 1256, | |
221 | 1257, | |
222 | 1258, | |
223 | 1361, | |
224 | ||
225 | 20127, | |
226 | ||
227 | 0, | |
228 | 54936, | |
229 | ||
230 | 50221, // we prefere this over 50220/50221 since that's what CF coverter generates | |
231 | 0, | |
232 | 0, | |
233 | 0, | |
234 | 50227, | |
235 | 0, | |
236 | 50225, | |
237 | ||
238 | 51932, | |
239 | 51936, | |
240 | 51950, | |
241 | 51949, | |
242 | ||
243 | 0, | |
244 | ||
245 | 20866, | |
246 | ||
247 | 0, | |
248 | ||
249 | 0, | |
250 | 52936, | |
251 | 0, | |
252 | 0, | |
253 | 21866, | |
254 | 0, | |
255 | 0, | |
256 | ||
257 | 0, | |
258 | ||
259 | 37 | |
260 | }; | |
261 | ||
262 | // Canonical name | |
263 | static const char *__CFCanonicalNameList[] = { | |
264 | "macintosh", | |
265 | "japanese", | |
266 | "trad-chinese", | |
267 | "korean", | |
268 | "arabic", | |
269 | "hebrew", | |
270 | "greek", | |
271 | "cyrillic", | |
272 | "devanagari", | |
273 | "gurmukhi", | |
274 | "gujarati", | |
275 | "oriya", | |
276 | "bengali", | |
277 | "tamil", | |
278 | "telugu", | |
279 | "kannada", | |
280 | "malayalam", | |
281 | "sinhalese", | |
282 | "burmese", | |
283 | "khmer", | |
284 | "thai", | |
285 | "laotian", | |
286 | "georgian", | |
287 | "armenian", | |
288 | "simp-chinese", | |
289 | "tibetan", | |
290 | "mongolian", | |
291 | "ethiopic", | |
292 | "centraleurroman", | |
293 | "vietnamese", | |
294 | "symbol", | |
295 | "dingbats", | |
296 | "turkish", | |
297 | "croatian", | |
298 | "icelandic", | |
299 | "romanian", | |
300 | "celtic", | |
301 | "gaelic", | |
302 | "farsi", | |
303 | "ukrainian", | |
304 | "inuit", | |
305 | ||
306 | NULL, | |
307 | NULL, | |
308 | NULL, | |
309 | NULL, | |
310 | NULL, | |
311 | NULL, | |
312 | NULL, | |
313 | NULL, | |
314 | NULL, | |
315 | NULL, | |
316 | NULL, | |
317 | NULL, | |
318 | NULL, | |
319 | NULL, | |
320 | NULL, | |
321 | NULL, | |
322 | NULL, | |
323 | NULL, | |
324 | NULL, | |
325 | NULL, | |
326 | NULL, | |
327 | ||
328 | NULL, | |
329 | NULL, | |
330 | NULL, | |
331 | NULL, | |
332 | NULL, | |
333 | NULL, | |
334 | NULL, | |
335 | NULL, | |
336 | NULL, | |
337 | NULL, | |
338 | ||
339 | "us-ascii", | |
340 | ||
341 | NULL, | |
342 | "gb18030", | |
343 | ||
344 | "iso-2022-jp", | |
345 | "iso-2022-jp-2", | |
346 | "iso-2022-jp-1", | |
347 | "iso-2022-jp-3", | |
348 | "iso-2022-cn", | |
349 | "iso-2022-cn-ext", | |
350 | "iso-2022-kr", | |
351 | "euc-jp", | |
352 | "gb2312", | |
353 | "euc-tw", | |
354 | "euc-kr", | |
355 | ||
356 | "shift_jis", | |
357 | ||
358 | "koi8-r", | |
359 | ||
360 | "big5", | |
361 | ||
362 | "roman-latin1", | |
363 | "hz-gb-2312", | |
364 | "big5-hkscs", | |
365 | "viscii", | |
366 | "koi8-u", | |
367 | NULL, | |
368 | "utf7-imap", | |
369 | ||
370 | "x-nextstep", | |
371 | ||
372 | "ibm037", | |
373 | }; | |
374 | ||
375 | static inline CFIndex __CFGetEncodingIndex(CFStringEncoding encoding) { | |
376 | const uint16_t *head = __CFKnownEncodingList; | |
377 | const uint16_t *tail = head + ((sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList)) - 1); | |
378 | const uint16_t *middle; | |
379 | ||
380 | encoding &= 0x0FFF; | |
381 | while (head <= tail) { | |
382 | middle = head + ((tail - head) >> 1); | |
383 | ||
384 | if (encoding == *middle) { | |
385 | return middle - __CFKnownEncodingList; | |
386 | } else if (encoding < *middle) { | |
387 | tail = middle - 1; | |
388 | } else { | |
389 | head = middle + 1; | |
390 | } | |
391 | } | |
392 | ||
393 | return kCFNotFound; | |
394 | } | |
395 | ||
396 | __private_extern__ uint16_t __CFStringEncodingGetWindowsCodePage(CFStringEncoding encoding) { | |
397 | CFStringEncoding encodingBase = encoding & 0x0F00; | |
398 | ||
399 | if (0x0100 == encodingBase) { // UTF | |
400 | switch (encoding) { | |
401 | case kCFStringEncodingUTF7: return 65000; | |
402 | case kCFStringEncodingUTF8: return 65001; | |
403 | case kCFStringEncodingUTF16: return 1200; | |
404 | case kCFStringEncodingUTF16BE: return 1201; | |
405 | case kCFStringEncodingUTF32: return 65005; | |
406 | case kCFStringEncodingUTF32BE: return 65006; | |
407 | } | |
408 | } else if (0x0200 == encodingBase) { // ISO 8859 range | |
409 | return ISO8859CODEPAGE_BASE + (encoding & 0xFF); | |
410 | } else { // others | |
411 | CFIndex index = __CFGetEncodingIndex(encoding); | |
412 | ||
413 | if (kCFNotFound != index) return __CFWindowsCPList[index]; | |
414 | } | |
415 | ||
416 | return 0; | |
417 | } | |
418 | ||
419 | __private_extern__ CFStringEncoding __CFStringEncodingGetFromWindowsCodePage(uint16_t codepage) { | |
420 | switch (codepage) { | |
421 | case 65001: return kCFStringEncodingUTF8; | |
422 | case 1200: return kCFStringEncodingUTF16; | |
423 | case 0: return kCFStringEncodingInvalidId; | |
424 | case 1201: return kCFStringEncodingUTF16BE; | |
425 | case 65005: return kCFStringEncodingUTF32; | |
426 | case 65006: return kCFStringEncodingUTF32BE; | |
427 | case 65000: return kCFStringEncodingUTF7; | |
428 | } | |
429 | ||
430 | if ((codepage > ISO8859CODEPAGE_BASE) && (codepage <= (ISO8859CODEPAGE_BASE + 16))) { | |
431 | return (codepage - ISO8859CODEPAGE_BASE) + 0x0200; | |
432 | } else { | |
433 | static CFMutableDictionaryRef mappingTable = NULL; | |
434 | static CFSpinLock_t lock = CFSpinLockInit; | |
435 | uintptr_t value; | |
436 | ||
437 | __CFSpinLock(&lock); | |
438 | if (NULL == mappingTable) { | |
439 | CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList); | |
440 | ||
441 | mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL); | |
442 | ||
443 | for (index = 0;index < count;index++) { | |
444 | if (0 != __CFWindowsCPList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFWindowsCPList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]); | |
445 | } | |
446 | } | |
447 | __CFSpinUnlock(&lock); | |
448 | ||
449 | if (CFDictionaryGetValueIfPresent(mappingTable, (const void *)(uintptr_t)codepage, (const void **)&value)) return (CFStringEncoding)value; | |
450 | } | |
451 | ||
452 | ||
453 | return kCFStringEncodingInvalidId; | |
454 | } | |
455 | ||
456 | __private_extern__ bool __CFStringEncodingGetCanonicalName(CFStringEncoding encoding, char *buffer, CFIndex bufferSize) { | |
457 | const char *format = "%s"; | |
458 | const char *name = NULL; | |
459 | uint32_t value = 0; | |
460 | CFIndex index; | |
461 | ||
462 | switch (encoding & 0x0F00) { | |
463 | case 0x0100: // UTF range | |
464 | switch (encoding) { | |
465 | case kCFStringEncodingUTF7: name = "utf-7"; break; | |
466 | case kCFStringEncodingUTF8: name = "utf-8"; break; | |
467 | case kCFStringEncodingUTF16: name = "utf-16"; break; | |
468 | case kCFStringEncodingUTF16BE: name = "utf-16be"; break; | |
469 | case kCFStringEncodingUTF16LE: name = "utf-16le"; break; | |
470 | case kCFStringEncodingUTF32: name = "utf-32"; break; | |
471 | case kCFStringEncodingUTF32BE: name = "utf-32be"; break; | |
472 | case kCFStringEncodingUTF32LE: name = "utf-32le"; break; | |
473 | } | |
474 | break; | |
475 | ||
476 | case 0x0200: // ISO 8859 range | |
477 | format = "iso-8859-%d"; | |
478 | value = (encoding & 0xFF); | |
479 | break; | |
480 | ||
481 | case 0x0400: // DOS code page range | |
482 | case 0x0500: // Windows code page range | |
483 | index = __CFGetEncodingIndex(encoding); | |
484 | ||
485 | if (kCFNotFound != index) { | |
486 | value = __CFWindowsCPList[index]; | |
487 | if (0 != value) format = ((0x0400 == (encoding & 0x0F00)) ? "cp%d" : "windows-%d"); | |
488 | } | |
489 | break; | |
490 | ||
491 | default: // others | |
492 | index = __CFGetEncodingIndex(encoding); | |
493 | ||
494 | if (kCFNotFound != index) { | |
495 | if (((0 == (encoding & 0x0F00)) && (kCFStringEncodingMacRoman != encoding)) || (kCFStringEncodingMacRomanLatin1 == encoding)) format = "x-mac-%s"; | |
496 | name = (const char *)__CFCanonicalNameList[index]; | |
497 | } | |
498 | break; | |
499 | } | |
500 | ||
501 | if ((0 == value) && (NULL == name)) { | |
502 | return false; | |
503 | } else if (0 != value) { | |
504 | return ((snprintf(buffer, bufferSize, format, value) < bufferSize) ? true : false); | |
505 | } else { | |
506 | return ((snprintf(buffer, bufferSize, format, name) < bufferSize) ? true : false); | |
507 | } | |
508 | } | |
509 | ||
510 | #define LENGTH_LIMIT (256) | |
511 | static Boolean __CFCanonicalNameCompare(const void *value1, const void *value2) { return ((0 == strncasecmp_l((const char *)value1, (const char *)value2, LENGTH_LIMIT, NULL)) ? true : false); } | |
512 | ||
513 | static CFHashCode __CFCanonicalNameHash(const void *value) { | |
514 | const char *name = (const char *)value; | |
515 | CFHashCode code = 0; | |
516 | ||
517 | while ((0 != *name) && ((name - (const char *)value) < LENGTH_LIMIT)) { | |
518 | char character = *(name++); | |
519 | ||
520 | code += (character + (((character >= 'A') && (character <= 'Z')) ? 'a' - 'A' : 0)); | |
521 | } | |
522 | ||
523 | return code * (name - (const char *)value); | |
524 | } | |
525 | ||
526 | __private_extern__ CFStringEncoding __CFStringEncodingGetFromCanonicalName(const char *canonicalName) { | |
527 | CFStringEncoding encoding; | |
528 | CFIndex prefixLength; | |
529 | static CFMutableDictionaryRef mappingTable = NULL; | |
530 | static CFSpinLock_t lock = CFSpinLockInit; | |
531 | ||
532 | prefixLength = strlen("iso-8859-"); | |
533 | if (0 == strncasecmp_l(canonicalName, "iso-8859-", prefixLength, NULL)) {// do ISO | |
534 | encoding = strtol(canonicalName + prefixLength, NULL, 10); | |
535 | ||
536 | return (((0 == encoding) || (encoding > 16)) ? kCFStringEncodingInvalidId : encoding + 0x0200); | |
537 | } | |
538 | ||
539 | prefixLength = strlen("cp"); | |
540 | if (0 == strncasecmp_l(canonicalName, "cp", prefixLength, NULL)) {// do DOS | |
541 | encoding = strtol(canonicalName + prefixLength, NULL, 10); | |
542 | ||
543 | return __CFStringEncodingGetFromWindowsCodePage(encoding); | |
544 | } | |
545 | ||
546 | prefixLength = strlen("windows-"); | |
547 | if (0 == strncasecmp_l(canonicalName, "windows-", prefixLength, NULL)) {// do DOS | |
548 | encoding = strtol(canonicalName + prefixLength, NULL, 10); | |
549 | ||
550 | return __CFStringEncodingGetFromWindowsCodePage(encoding); | |
551 | } | |
552 | ||
553 | __CFSpinLock(&lock); | |
554 | if (NULL == mappingTable) { | |
555 | CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList); | |
556 | ||
557 | CFDictionaryKeyCallBacks keys = { | |
558 | 0, NULL, NULL, NULL, &__CFCanonicalNameCompare, &__CFCanonicalNameHash | |
559 | }; | |
560 | ||
561 | mappingTable = CFDictionaryCreateMutable(NULL, 0, &keys, NULL); | |
562 | ||
563 | // Add UTFs | |
564 | CFDictionarySetValue(mappingTable, "utf-7", (const void *)kCFStringEncodingUTF7); | |
565 | CFDictionarySetValue(mappingTable, "utf-8", (const void *)kCFStringEncodingUTF8); | |
566 | CFDictionarySetValue(mappingTable, "utf-16", (const void *)kCFStringEncodingUTF16); | |
567 | CFDictionarySetValue(mappingTable, "utf-16be", (const void *)kCFStringEncodingUTF16BE); | |
568 | CFDictionarySetValue(mappingTable, "utf-16le", (const void *)kCFStringEncodingUTF16LE); | |
569 | CFDictionarySetValue(mappingTable, "utf-32", (const void *)kCFStringEncodingUTF32); | |
570 | CFDictionarySetValue(mappingTable, "utf-32be", (const void *)kCFStringEncodingUTF32BE); | |
571 | CFDictionarySetValue(mappingTable, "utf-32le", (const void *)kCFStringEncodingUTF32LE); | |
572 | ||
573 | for (index = 0;index < count;index++) { | |
574 | if (NULL != __CFCanonicalNameList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFCanonicalNameList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]); | |
575 | } | |
576 | } | |
577 | __CFSpinUnlock(&lock); | |
578 | ||
579 | if (0 == strncasecmp_l(canonicalName, "macintosh", sizeof("macintosh") - 1, NULL)) return kCFStringEncodingMacRoman; | |
580 | ||
581 | ||
582 | prefixLength = strlen("x-mac-"); | |
583 | encoding = (CFStringEncoding)(CFIndex)CFDictionaryGetValue(mappingTable, canonicalName + ((0 == strncasecmp_l(canonicalName, "x-mac-", prefixLength, NULL)) ? prefixLength : 0)); | |
584 | ||
585 | return ((0 == encoding) ? kCFStringEncodingInvalidId : encoding); | |
586 | } | |
587 | #undef LENGTH_LIMIT | |
588 | ||
589 | #if DEPLOYMENT_TARGET_MACOSX | |
590 | // This list indexes from DOS range | |
591 | static uint16_t __CFISO8859SimilarScriptList[] = { | |
592 | kCFStringEncodingMacRoman, | |
593 | kCFStringEncodingMacCentralEurRoman, | |
594 | kCFStringEncodingMacRoman, | |
595 | kCFStringEncodingMacCentralEurRoman, | |
596 | kCFStringEncodingMacCyrillic, | |
597 | kCFStringEncodingMacArabic, | |
598 | kCFStringEncodingMacGreek, | |
599 | kCFStringEncodingMacHebrew, | |
600 | kCFStringEncodingMacTurkish, | |
601 | kCFStringEncodingMacInuit, | |
602 | kCFStringEncodingMacThai, | |
603 | kCFStringEncodingMacRoman, | |
604 | kCFStringEncodingMacCentralEurRoman, | |
605 | kCFStringEncodingMacCeltic, | |
606 | kCFStringEncodingMacRoman, | |
607 | kCFStringEncodingMacRomanian}; | |
608 | ||
609 | static uint16_t __CFOtherSimilarScriptList[] = { | |
610 | kCFStringEncodingMacRoman, | |
611 | kCFStringEncodingMacGreek, | |
612 | kCFStringEncodingMacCentralEurRoman, | |
613 | kCFStringEncodingMacRoman, | |
614 | kCFStringEncodingMacGreek, | |
615 | kCFStringEncodingMacCentralEurRoman, | |
616 | kCFStringEncodingMacCyrillic, | |
617 | kCFStringEncodingMacTurkish, | |
618 | kCFStringEncodingMacRoman, | |
619 | kCFStringEncodingMacIcelandic, | |
620 | kCFStringEncodingMacHebrew, | |
621 | kCFStringEncodingMacRoman, | |
622 | kCFStringEncodingMacArabic, | |
623 | kCFStringEncodingMacInuit, | |
624 | kCFStringEncodingMacCyrillic, | |
625 | kCFStringEncodingMacGreek, | |
626 | kCFStringEncodingMacThai, | |
627 | kCFStringEncodingMacJapanese, | |
628 | kCFStringEncodingMacChineseSimp, | |
629 | kCFStringEncodingMacKorean, | |
630 | kCFStringEncodingMacChineseTrad, | |
631 | ||
632 | kCFStringEncodingMacRoman, | |
633 | kCFStringEncodingMacCentralEurRoman, | |
634 | kCFStringEncodingMacCyrillic, | |
635 | kCFStringEncodingMacGreek, | |
636 | kCFStringEncodingMacTurkish, | |
637 | kCFStringEncodingMacHebrew, | |
638 | kCFStringEncodingMacArabic, | |
639 | kCFStringEncodingMacCentralEurRoman, | |
640 | kCFStringEncodingMacVietnamese, | |
641 | kCFStringEncodingMacKorean, | |
642 | ||
643 | kCFStringEncodingMacRoman, | |
644 | ||
645 | kCFStringEncodingMacJapanese, | |
646 | kCFStringEncodingMacChineseSimp, | |
647 | ||
648 | kCFStringEncodingMacJapanese, | |
649 | kCFStringEncodingMacJapanese, | |
650 | kCFStringEncodingMacJapanese, | |
651 | kCFStringEncodingMacJapanese, | |
652 | kCFStringEncodingMacChineseSimp, | |
653 | kCFStringEncodingMacChineseSimp, | |
654 | kCFStringEncodingMacKorean, | |
655 | kCFStringEncodingMacJapanese, | |
656 | kCFStringEncodingMacChineseSimp, | |
657 | kCFStringEncodingMacChineseTrad, | |
658 | kCFStringEncodingMacKorean, | |
659 | ||
660 | kCFStringEncodingMacJapanese, | |
661 | ||
662 | kCFStringEncodingMacCyrillic, | |
663 | ||
664 | kCFStringEncodingMacChineseTrad, | |
665 | ||
666 | kCFStringEncodingMacRoman, | |
667 | kCFStringEncodingMacChineseSimp, | |
668 | kCFStringEncodingMacChineseTrad, | |
669 | kCFStringEncodingMacVietnamese, | |
670 | kCFStringEncodingMacUkrainian, | |
671 | kCFStringEncodingMacChineseTrad, | |
672 | kCFStringEncodingMacRoman, | |
673 | ||
674 | kCFStringEncodingMacRoman, | |
675 | ||
676 | kCFStringEncodingMacRoman | |
677 | }; | |
678 | ||
679 | static const char *__CFISONameList[] = { | |
680 | "Western (ISO Latin 1)", | |
681 | "Central European (ISO Latin 2)", | |
682 | "Western (ISO Latin 3)", | |
683 | "Central European (ISO Latin 4)", | |
684 | "Cyrillic (ISO 8859-5)", | |
685 | "Arabic (ISO 8859-6)", | |
686 | "Greek (ISO 8859-7)", | |
687 | "Hebrew (ISO 8859-8)", | |
688 | "Turkish (ISO Latin 5)", | |
689 | "Nordic (ISO Latin 6)", | |
690 | "Thai (ISO 8859-11)", | |
691 | NULL, | |
692 | "Baltic (ISO Latin 7)", | |
693 | "Celtic (ISO Latin 8)", | |
694 | "Western (ISO Latin 9)", | |
695 | "Romanian (ISO Latin 10)", | |
696 | }; | |
697 | ||
698 | static const char *__CFOtherNameList[] = { | |
699 | "Western (Mac OS Roman)", | |
700 | "Japanese (Mac OS)", | |
701 | "Traditional Chinese (Mac OS)", | |
702 | "Korean (Mac OS)", | |
703 | "Arabic (Mac OS)", | |
704 | "Hebrew (Mac OS)", | |
705 | "Greek (Mac OS)", | |
706 | "Cyrillic (Mac OS)", | |
707 | "Devanagari (Mac OS)", | |
708 | "Gurmukhi (Mac OS)", | |
709 | "Gujarati (Mac OS)", | |
710 | "Oriya (Mac OS)", | |
711 | "Bengali (Mac OS)", | |
712 | "Tamil (Mac OS)", | |
713 | "Telugu (Mac OS)", | |
714 | "Kannada (Mac OS)", | |
715 | "Malayalam (Mac OS)", | |
716 | "Sinhalese (Mac OS)", | |
717 | "Burmese (Mac OS)", | |
718 | "Khmer (Mac OS)", | |
719 | "Thai (Mac OS)", | |
720 | "Laotian (Mac OS)", | |
721 | "Georgian (Mac OS)", | |
722 | "Armenian (Mac OS)", | |
723 | "Simplified Chinese (Mac OS)", | |
724 | "Tibetan (Mac OS)", | |
725 | "Mongolian (Mac OS)", | |
726 | "Ethiopic (Mac OS)", | |
727 | "Central European (Mac OS)", | |
728 | "Vietnamese (Mac OS)", | |
729 | "Symbol (Mac OS)", | |
730 | "Dingbats (Mac OS)", | |
731 | "Turkish (Mac OS)", | |
732 | "Croatian (Mac OS)", | |
733 | "Icelandic (Mac OS)", | |
734 | "Romanian (Mac OS)", | |
735 | "Celtic (Mac OS)", | |
736 | "Gaelic (Mac OS)", | |
737 | "Farsi (Mac OS)", | |
738 | "Cyrillic (Mac OS Ukrainian)", | |
739 | "Inuit (Mac OS)", | |
740 | "Latin-US (DOS)", | |
741 | "Greek (DOS)", | |
742 | "Baltic (DOS)", | |
743 | "Western (DOS Latin 1)", | |
744 | "Greek (DOS Greek 1)", | |
745 | "Central European (DOS Latin 2)", | |
746 | "Cyrillic (DOS)", | |
747 | "Turkish (DOS)", | |
748 | "Portuguese (DOS)", | |
749 | "Icelandic (DOS)", | |
750 | "Hebrew (DOS)", | |
751 | "Canadian French (DOS)", | |
752 | "Arabic (DOS)", | |
753 | "Nordic (DOS)", | |
754 | "Russian (DOS)", | |
755 | "Greek (DOS Greek 2)", | |
756 | "Thai (Windows, DOS)", | |
757 | "Japanese (Windows, DOS)", | |
758 | "Simplified Chinese (Windows, DOS)", | |
759 | "Korean (Windows, DOS)", | |
760 | "Traditional Chinese (Windows, DOS)", | |
761 | "Western (Windows Latin 1)", | |
762 | "Central European (Windows Latin 2)", | |
763 | "Cyrillic (Windows)", | |
764 | "Greek (Windows)", | |
765 | "Turkish (Windows Latin 5)", | |
766 | "Hebrew (Windows)", | |
767 | "Arabic (Windows)", | |
768 | "Baltic (Windows)", | |
769 | "Vietnamese (Windows)", | |
770 | "Korean (Windows Johab)", | |
771 | "Western (ASCII)", | |
772 | "Japanese (Shift JIS X0213)", | |
773 | "Chinese (GB 18030)", | |
774 | "Japanese (ISO 2022-JP)", | |
775 | "Japanese (ISO 2022-JP-2)", | |
776 | "Japanese (ISO 2022-JP-1)", | |
777 | "Japanese (ISO 2022-JP-3)", | |
778 | "Chinese (ISO 2022-CN)", | |
779 | "Chinese (ISO 2022-CN-EXT)", | |
780 | "Korean (ISO 2022-KR)", | |
781 | "Japanese (EUC)", | |
782 | "Simplified Chinese (GB 2312)", | |
783 | "Traditional Chinese (EUC)", | |
784 | "Korean (EUC)", | |
785 | "Japanese (Shift JIS)", | |
786 | "Cyrillic (KOI8-R)", | |
787 | "Traditional Chinese (Big 5)", | |
788 | "Western (Mac Mail)", | |
789 | "Simplified Chinese (HZ GB 2312)", | |
790 | "Traditional Chinese (Big 5 HKSCS)", | |
791 | NULL, | |
792 | "Ukrainian (KOI8-U)", | |
793 | "Traditional Chinese (Big 5-E)", | |
794 | NULL, | |
795 | "Western (NextStep)", | |
796 | "Western (EBCDIC Latin 1)", | |
797 | }; | |
798 | #endif /* DEPLOYMENT_TARGET_MACOSX */ | |
799 | ||
800 | __private_extern__ CFStringEncoding __CFStringEncodingGetMostCompatibleMacScript(CFStringEncoding encoding) { | |
801 | #if DEPLOYMENT_TARGET_MACOSX | |
802 | switch (encoding & 0x0F00) { | |
803 | case 0: return encoding & 0xFF; break; // Mac scripts | |
804 | ||
805 | case 0x0100: return kCFStringEncodingUnicode; break; // Unicode | |
806 | ||
807 | case 0x200: // ISO 8859 | |
808 | return (((encoding & 0xFF) <= (sizeof(__CFISO8859SimilarScriptList) / sizeof(*__CFISO8859SimilarScriptList))) ? __CFISO8859SimilarScriptList[(encoding & 0xFF) - 1] : kCFStringEncodingInvalidId); | |
809 | break; | |
810 | ||
811 | default: { | |
812 | CFIndex index = __CFGetEncodingIndex(encoding); | |
813 | ||
814 | if (kCFNotFound != index) { | |
815 | index -= __CFGetEncodingIndex(kCFStringEncodingDOSLatinUS); | |
816 | return __CFOtherSimilarScriptList[index]; | |
817 | } | |
818 | } | |
819 | } | |
820 | #endif /* DEPLOYMENT_TARGET_MACOSX */ | |
821 | ||
822 | return kCFStringEncodingInvalidId; | |
823 | } | |
824 | ||
825 | __private_extern__ const char *__CFStringEncodingGetName(CFStringEncoding encoding) { | |
826 | switch (encoding) { | |
827 | case kCFStringEncodingUTF8: return "Unicode (UTF-8)"; break; | |
828 | case kCFStringEncodingUTF16: return "Unicode (UTF-16)"; break; | |
829 | case kCFStringEncodingUTF16BE: return "Unicode (UTF-16BE)"; break; | |
830 | case kCFStringEncodingUTF16LE: return "Unicode (UTF-16LE)"; break; | |
831 | case kCFStringEncodingUTF32: return "Unicode (UTF-32)"; break; | |
832 | case kCFStringEncodingUTF32BE: return "Unicode (UTF-32BE)"; break; | |
833 | case kCFStringEncodingUTF32LE: return "Unicode (UTF-32LE)"; break; | |
834 | case kCFStringEncodingNonLossyASCII: return "Non-lossy ASCII"; break; | |
835 | case kCFStringEncodingUTF7: return "Unicode (UTF-7)"; break; | |
836 | } | |
837 | ||
838 | #if DEPLOYMENT_TARGET_MACOSX | |
839 | if (0x0200 == (encoding & 0x0F00)) { | |
840 | encoding &= 0x00FF; | |
841 | ||
842 | if (encoding <= (sizeof(__CFISONameList) / sizeof(*__CFISONameList))) return __CFISONameList[encoding - 1]; | |
843 | } else { | |
844 | CFIndex index = __CFGetEncodingIndex(encoding); | |
845 | ||
846 | if (kCFNotFound != index) return __CFOtherNameList[index]; | |
847 | } | |
848 | #endif /* DEPLOYMENT_TARGET_MACOSX */ | |
849 | ||
850 | return NULL; | |
851 | } |