]> git.saurik.com Git - apple/cf.git/blobdiff - CFStringEncodingDatabase.c
CF-550.tar.gz
[apple/cf.git] / CFStringEncodingDatabase.c
diff --git a/CFStringEncodingDatabase.c b/CFStringEncodingDatabase.c
new file mode 100644 (file)
index 0000000..715edd5
--- /dev/null
@@ -0,0 +1,859 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/*
+ *  CFStringEncodingDatabase.c
+ *  CoreFoundation
+ *
+ *  Created by Aki Inoue on 07/12/05.
+ *  Copyright 2007-2009, Apple Inc. All rights reserved.
+ *
+ */
+
+#include "CFInternal.h"
+#include <CoreFoundation/CFStringEncodingExt.h>
+#include "CFStringEncodingConverterPriv.h"
+#include "CFStringEncodingDatabase.h"
+#include <stdio.h>
+
+#if DEPLOYMENT_TARGET_WINDOWS
+#define strncasecmp_l(a, b, c, d) _strnicmp(a, b, c)
+#define snprintf _snprintf
+#endif
+
+#define ISO8859CODEPAGE_BASE (28590)
+
+static const uint16_t __CFKnownEncodingList[] = {
+    kCFStringEncodingMacRoman,
+    kCFStringEncodingMacJapanese,
+    kCFStringEncodingMacChineseTrad,
+    kCFStringEncodingMacKorean,
+    kCFStringEncodingMacArabic,
+    kCFStringEncodingMacHebrew,
+    kCFStringEncodingMacGreek,
+    kCFStringEncodingMacCyrillic,
+    kCFStringEncodingMacDevanagari,
+    kCFStringEncodingMacGurmukhi,
+    kCFStringEncodingMacGujarati,
+    kCFStringEncodingMacOriya,
+    kCFStringEncodingMacBengali,
+    kCFStringEncodingMacTamil,
+    kCFStringEncodingMacTelugu,
+    kCFStringEncodingMacKannada,
+    kCFStringEncodingMacMalayalam,
+    kCFStringEncodingMacSinhalese,
+    kCFStringEncodingMacBurmese,
+    kCFStringEncodingMacKhmer,
+    kCFStringEncodingMacThai,
+    kCFStringEncodingMacLaotian,
+    kCFStringEncodingMacGeorgian,
+    kCFStringEncodingMacArmenian,
+    kCFStringEncodingMacChineseSimp,
+    kCFStringEncodingMacTibetan,
+    kCFStringEncodingMacMongolian,
+    kCFStringEncodingMacEthiopic,
+    kCFStringEncodingMacCentralEurRoman,
+    kCFStringEncodingMacVietnamese,
+    kCFStringEncodingMacSymbol,
+    kCFStringEncodingMacDingbats,
+    kCFStringEncodingMacTurkish,
+    kCFStringEncodingMacCroatian,
+    kCFStringEncodingMacIcelandic,
+    kCFStringEncodingMacRomanian,
+    kCFStringEncodingMacCeltic,
+    kCFStringEncodingMacGaelic,
+    kCFStringEncodingMacFarsi,
+    kCFStringEncodingMacUkrainian,
+    kCFStringEncodingMacInuit,
+    
+    kCFStringEncodingDOSLatinUS,
+    kCFStringEncodingDOSGreek,
+    kCFStringEncodingDOSBalticRim,
+    kCFStringEncodingDOSLatin1,
+    kCFStringEncodingDOSGreek1,
+    kCFStringEncodingDOSLatin2,
+    kCFStringEncodingDOSCyrillic,
+    kCFStringEncodingDOSTurkish,
+    kCFStringEncodingDOSPortuguese,
+    kCFStringEncodingDOSIcelandic,
+    kCFStringEncodingDOSHebrew,
+    kCFStringEncodingDOSCanadianFrench,
+    kCFStringEncodingDOSArabic,
+    kCFStringEncodingDOSNordic,
+    kCFStringEncodingDOSRussian,
+    kCFStringEncodingDOSGreek2,
+    kCFStringEncodingDOSThai,
+    kCFStringEncodingDOSJapanese,
+    kCFStringEncodingDOSChineseSimplif,
+    kCFStringEncodingDOSKorean,
+    kCFStringEncodingDOSChineseTrad,
+    
+    kCFStringEncodingWindowsLatin1,
+    kCFStringEncodingWindowsLatin2,
+    kCFStringEncodingWindowsCyrillic,
+    kCFStringEncodingWindowsGreek,
+    kCFStringEncodingWindowsLatin5,
+    kCFStringEncodingWindowsHebrew,
+    kCFStringEncodingWindowsArabic,
+    kCFStringEncodingWindowsBalticRim,
+    kCFStringEncodingWindowsVietnamese,
+    kCFStringEncodingWindowsKoreanJohab,
+    kCFStringEncodingASCII,
+    
+    kCFStringEncodingShiftJIS_X0213,
+    kCFStringEncodingGB_18030_2000,
+    
+    kCFStringEncodingISO_2022_JP,
+    kCFStringEncodingISO_2022_JP_2,
+    kCFStringEncodingISO_2022_JP_1,
+    kCFStringEncodingISO_2022_JP_3,
+    kCFStringEncodingISO_2022_CN,
+    kCFStringEncodingISO_2022_CN_EXT,
+    kCFStringEncodingISO_2022_KR,
+    kCFStringEncodingEUC_JP,
+    kCFStringEncodingEUC_CN,
+    kCFStringEncodingEUC_TW,
+    kCFStringEncodingEUC_KR,
+    
+    kCFStringEncodingShiftJIS,
+
+    kCFStringEncodingKOI8_R,
+
+    kCFStringEncodingBig5,
+
+    kCFStringEncodingMacRomanLatin1,
+    kCFStringEncodingHZ_GB_2312,
+    kCFStringEncodingBig5_HKSCS_1999,
+    kCFStringEncodingVISCII,
+    kCFStringEncodingKOI8_U,
+    kCFStringEncodingBig5_E,
+    kCFStringEncodingUTF7_IMAP,
+    
+    kCFStringEncodingNextStepLatin,
+    
+    kCFStringEncodingEBCDIC_CP037
+};
+
+// Windows codepage mapping
+static const uint16_t __CFWindowsCPList[] = {
+    10000,
+    10001,
+    10002,
+    10003,
+    10004,
+    10005,
+    10006,
+    10007,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    10021,
+    0,
+    0,
+    0,
+    10008,
+    0,
+    0,
+    0,
+    10029,
+    0,
+    0,
+    0,
+    10081,
+    10082,
+    10079,
+    10010,
+    0,
+    0,
+    0,
+    10017,
+    0,
+    
+    437,
+    737,
+    775,
+    850,
+    851,
+    852,
+    855,
+    857,
+    860,
+    861,
+    862,
+    863,
+    864,
+    865,
+    866,
+    869,
+    874,
+    932,
+    936,
+    949,
+    950,
+    
+    1252,
+    1250,
+    1251,
+    1253,
+    1254,
+    1255,
+    1256,
+    1257,
+    1258,
+    1361,
+
+    20127,
+    
+    0,
+    54936,
+    
+    50221, // we prefere this over 50220/50221 since that's what CF coverter generates
+    0,
+    0,
+    0,
+    50227,
+    0,
+    50225,
+    
+    51932,
+    51936,
+    51950,
+    51949,
+    
+    0,
+
+    20866,
+
+    0,
+
+    0,
+    52936,
+    0,
+    0,
+    21866,
+    0,
+    0,
+    
+    0,
+    
+    37
+};
+
+// Canonical name
+static const char *__CFCanonicalNameList[] = {
+    "macintosh",
+    "japanese",
+    "trad-chinese",
+    "korean",
+    "arabic",
+    "hebrew",
+    "greek",
+    "cyrillic",
+    "devanagari",
+    "gurmukhi",
+    "gujarati",
+    "oriya",
+    "bengali",
+    "tamil",
+    "telugu",
+    "kannada",
+    "malayalam",
+    "sinhalese",
+    "burmese",
+    "khmer",
+    "thai",
+    "laotian",
+    "georgian",
+    "armenian",
+    "simp-chinese",
+    "tibetan",
+    "mongolian",
+    "ethiopic",
+    "centraleurroman",
+    "vietnamese",
+    "symbol",
+    "dingbats",
+    "turkish",
+    "croatian",
+    "icelandic",
+    "romanian",
+    "celtic",
+    "gaelic",
+    "farsi",
+    "ukrainian",
+    "inuit",
+    
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    
+    "us-ascii",
+    
+    NULL,
+    "gb18030",
+    
+    "iso-2022-jp",
+    "iso-2022-jp-2",
+    "iso-2022-jp-1",
+    "iso-2022-jp-3",
+    "iso-2022-cn",
+    "iso-2022-cn-ext",
+    "iso-2022-kr",
+    "euc-jp",
+    "gb2312",
+    "euc-tw",
+    "euc-kr",
+    
+    "shift_jis",
+
+    "koi8-r",
+
+    "big5",
+
+    "roman-latin1",
+    "hz-gb-2312",
+    "big5-hkscs",
+    "viscii",
+    "koi8-u",
+    NULL,
+    "utf7-imap",
+    
+    "x-nextstep",
+    
+    "ibm037",
+};
+
+static inline CFIndex __CFGetEncodingIndex(CFStringEncoding encoding) {
+    const uint16_t *head = __CFKnownEncodingList;
+    const uint16_t *tail = head + ((sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList)) - 1);
+    const uint16_t *middle;
+
+    encoding &= 0x0FFF;
+    while (head <= tail) {
+        middle = head + ((tail - head) >> 1);
+
+        if (encoding == *middle) {
+            return middle - __CFKnownEncodingList;
+        } else if (encoding < *middle) {
+            tail = middle - 1;
+        } else {
+            head = middle + 1;
+        }
+    }
+
+    return kCFNotFound;
+}
+
+__private_extern__ uint16_t __CFStringEncodingGetWindowsCodePage(CFStringEncoding encoding) {
+    CFStringEncoding encodingBase = encoding & 0x0F00;
+
+    if (0x0100 == encodingBase) { // UTF
+        switch (encoding) {
+            case kCFStringEncodingUTF7: return 65000;
+            case kCFStringEncodingUTF8: return 65001;
+            case kCFStringEncodingUTF16: return 1200;
+            case kCFStringEncodingUTF16BE: return 1201;
+            case kCFStringEncodingUTF32: return 65005;
+            case kCFStringEncodingUTF32BE: return 65006;
+        }        
+    } else if (0x0200 == encodingBase) { // ISO 8859 range
+        return ISO8859CODEPAGE_BASE + (encoding & 0xFF);
+    } else { // others
+        CFIndex index = __CFGetEncodingIndex(encoding);
+
+        if (kCFNotFound != index) return __CFWindowsCPList[index];
+    }
+
+    return 0;
+}
+
+__private_extern__ CFStringEncoding __CFStringEncodingGetFromWindowsCodePage(uint16_t codepage) {
+    switch (codepage) {
+        case 65001: return kCFStringEncodingUTF8;
+        case 1200: return kCFStringEncodingUTF16;
+        case 0: return kCFStringEncodingInvalidId;
+        case 1201: return kCFStringEncodingUTF16BE;
+        case 65005: return kCFStringEncodingUTF32;
+        case 65006: return kCFStringEncodingUTF32BE;
+        case 65000: return kCFStringEncodingUTF7;
+    }
+
+    if ((codepage > ISO8859CODEPAGE_BASE) && (codepage <= (ISO8859CODEPAGE_BASE + 16))) {
+        return (codepage - ISO8859CODEPAGE_BASE) + 0x0200;
+    } else {
+        static CFMutableDictionaryRef mappingTable = NULL;
+        static CFSpinLock_t lock = CFSpinLockInit;
+        uintptr_t value;
+
+        __CFSpinLock(&lock);
+        if (NULL == mappingTable) {
+            CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
+            
+            mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
+
+            for (index = 0;index < count;index++) {
+                if (0 != __CFWindowsCPList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFWindowsCPList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
+            }
+        }
+        __CFSpinUnlock(&lock);
+
+        if (CFDictionaryGetValueIfPresent(mappingTable, (const void *)(uintptr_t)codepage, (const void **)&value)) return (CFStringEncoding)value;
+    }
+
+
+    return kCFStringEncodingInvalidId;
+}
+
+__private_extern__ bool __CFStringEncodingGetCanonicalName(CFStringEncoding encoding, char *buffer, CFIndex bufferSize) {
+    const char *format = "%s";
+    const char *name = NULL;
+    uint32_t value = 0;
+    CFIndex index;
+
+    switch (encoding & 0x0F00) {
+        case 0x0100: // UTF range
+            switch (encoding) {
+                case kCFStringEncodingUTF7: name = "utf-7"; break;
+                case kCFStringEncodingUTF8: name = "utf-8"; break;
+                case kCFStringEncodingUTF16: name = "utf-16"; break;
+                case kCFStringEncodingUTF16BE: name = "utf-16be"; break;
+                case kCFStringEncodingUTF16LE: name = "utf-16le"; break;
+                case kCFStringEncodingUTF32: name = "utf-32"; break;
+                case kCFStringEncodingUTF32BE: name = "utf-32be"; break;
+                case kCFStringEncodingUTF32LE: name = "utf-32le"; break;
+            }
+            break;
+
+        case 0x0200: // ISO 8859 range
+            format = "iso-8859-%d";
+            value = (encoding & 0xFF);
+            break;
+
+        case 0x0400: // DOS code page range
+        case 0x0500: // Windows code page range
+            index = __CFGetEncodingIndex(encoding);
+            
+            if (kCFNotFound != index) {
+                value = __CFWindowsCPList[index];
+                if (0 != value) format = ((0x0400 == (encoding & 0x0F00)) ? "cp%d" : "windows-%d");
+            }
+            break;
+
+        default: // others
+            index = __CFGetEncodingIndex(encoding);
+
+            if (kCFNotFound != index) {
+                if (((0 == (encoding & 0x0F00)) && (kCFStringEncodingMacRoman != encoding)) || (kCFStringEncodingMacRomanLatin1 == encoding)) format = "x-mac-%s";
+                name = (const char *)__CFCanonicalNameList[index];
+            }
+            break;
+    }
+
+    if ((0 == value) && (NULL == name)) {
+        return false;
+    } else if (0 != value) {
+        return ((snprintf(buffer, bufferSize, format, value) < bufferSize) ? true : false);
+    } else {
+        return ((snprintf(buffer, bufferSize, format, name) < bufferSize) ? true : false);
+    }
+}
+
+#define LENGTH_LIMIT (256)
+static Boolean __CFCanonicalNameCompare(const void *value1, const void *value2) { return ((0 == strncasecmp_l((const char *)value1, (const char *)value2, LENGTH_LIMIT, NULL)) ? true : false); }
+
+static CFHashCode __CFCanonicalNameHash(const void *value) {
+    const char *name = (const char *)value;
+    CFHashCode code = 0;
+
+    while ((0 != *name) && ((name - (const char *)value) < LENGTH_LIMIT)) {
+        char character = *(name++);
+
+        code += (character + (((character >= 'A') && (character <= 'Z')) ? 'a' - 'A' : 0));
+    }
+
+    return code * (name - (const char *)value);
+}
+
+__private_extern__ CFStringEncoding __CFStringEncodingGetFromCanonicalName(const char *canonicalName) {
+    CFStringEncoding encoding;
+    CFIndex prefixLength;
+    static CFMutableDictionaryRef mappingTable = NULL;
+    static CFSpinLock_t lock = CFSpinLockInit;
+
+    prefixLength = strlen("iso-8859-");
+    if (0 == strncasecmp_l(canonicalName, "iso-8859-", prefixLength, NULL)) {// do ISO
+        encoding = strtol(canonicalName + prefixLength, NULL, 10);
+
+        return (((0 == encoding) || (encoding > 16)) ? kCFStringEncodingInvalidId : encoding + 0x0200);
+    }
+
+    prefixLength = strlen("cp");
+    if (0 == strncasecmp_l(canonicalName, "cp", prefixLength, NULL)) {// do DOS
+        encoding = strtol(canonicalName + prefixLength, NULL, 10);
+
+        return __CFStringEncodingGetFromWindowsCodePage(encoding);
+    }
+
+    prefixLength = strlen("windows-");
+    if (0 == strncasecmp_l(canonicalName, "windows-", prefixLength, NULL)) {// do DOS
+        encoding = strtol(canonicalName + prefixLength, NULL, 10);
+        
+        return __CFStringEncodingGetFromWindowsCodePage(encoding);
+    }
+    
+    __CFSpinLock(&lock);
+    if (NULL == mappingTable) {
+        CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
+
+        CFDictionaryKeyCallBacks keys = {
+            0, NULL, NULL, NULL, &__CFCanonicalNameCompare, &__CFCanonicalNameHash
+        };
+
+        mappingTable = CFDictionaryCreateMutable(NULL, 0, &keys, NULL);
+
+        // Add UTFs
+        CFDictionarySetValue(mappingTable, "utf-7", (const void *)kCFStringEncodingUTF7);
+        CFDictionarySetValue(mappingTable, "utf-8", (const void *)kCFStringEncodingUTF8);
+        CFDictionarySetValue(mappingTable, "utf-16", (const void *)kCFStringEncodingUTF16);
+        CFDictionarySetValue(mappingTable, "utf-16be", (const void *)kCFStringEncodingUTF16BE);
+        CFDictionarySetValue(mappingTable, "utf-16le", (const void *)kCFStringEncodingUTF16LE);
+        CFDictionarySetValue(mappingTable, "utf-32", (const void *)kCFStringEncodingUTF32);
+        CFDictionarySetValue(mappingTable, "utf-32be", (const void *)kCFStringEncodingUTF32BE);
+        CFDictionarySetValue(mappingTable, "utf-32le", (const void *)kCFStringEncodingUTF32LE);
+
+        for (index = 0;index < count;index++) {
+            if (NULL != __CFCanonicalNameList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFCanonicalNameList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
+        }
+    }
+    __CFSpinUnlock(&lock);
+
+    if (0 == strncasecmp_l(canonicalName, "macintosh", sizeof("macintosh") - 1, NULL)) return kCFStringEncodingMacRoman;
+
+    
+    prefixLength = strlen("x-mac-");
+    encoding = (CFStringEncoding)(CFIndex)CFDictionaryGetValue(mappingTable, canonicalName + ((0 == strncasecmp_l(canonicalName, "x-mac-", prefixLength, NULL)) ? prefixLength : 0));
+
+    return ((0 == encoding) ? kCFStringEncodingInvalidId : encoding);
+}
+#undef LENGTH_LIMIT
+
+#if DEPLOYMENT_TARGET_MACOSX
+// This list indexes from DOS range
+static uint16_t __CFISO8859SimilarScriptList[] = {
+    kCFStringEncodingMacRoman,
+    kCFStringEncodingMacCentralEurRoman,
+    kCFStringEncodingMacRoman,
+    kCFStringEncodingMacCentralEurRoman,
+    kCFStringEncodingMacCyrillic,
+    kCFStringEncodingMacArabic,
+    kCFStringEncodingMacGreek,
+    kCFStringEncodingMacHebrew,
+    kCFStringEncodingMacTurkish,
+    kCFStringEncodingMacInuit,
+    kCFStringEncodingMacThai,
+    kCFStringEncodingMacRoman,
+    kCFStringEncodingMacCentralEurRoman,
+    kCFStringEncodingMacCeltic,
+    kCFStringEncodingMacRoman,
+    kCFStringEncodingMacRomanian};
+
+static uint16_t __CFOtherSimilarScriptList[] = {
+    kCFStringEncodingMacRoman,
+    kCFStringEncodingMacGreek,
+    kCFStringEncodingMacCentralEurRoman,
+    kCFStringEncodingMacRoman,
+    kCFStringEncodingMacGreek,
+    kCFStringEncodingMacCentralEurRoman,
+    kCFStringEncodingMacCyrillic,
+    kCFStringEncodingMacTurkish,
+    kCFStringEncodingMacRoman,
+    kCFStringEncodingMacIcelandic,
+    kCFStringEncodingMacHebrew,
+    kCFStringEncodingMacRoman,
+    kCFStringEncodingMacArabic,
+    kCFStringEncodingMacInuit,
+    kCFStringEncodingMacCyrillic,
+    kCFStringEncodingMacGreek,
+    kCFStringEncodingMacThai,
+    kCFStringEncodingMacJapanese,
+    kCFStringEncodingMacChineseSimp,
+    kCFStringEncodingMacKorean,
+    kCFStringEncodingMacChineseTrad,
+    
+    kCFStringEncodingMacRoman,
+    kCFStringEncodingMacCentralEurRoman,
+    kCFStringEncodingMacCyrillic,
+    kCFStringEncodingMacGreek,
+    kCFStringEncodingMacTurkish,
+    kCFStringEncodingMacHebrew,
+    kCFStringEncodingMacArabic,
+    kCFStringEncodingMacCentralEurRoman,
+    kCFStringEncodingMacVietnamese,
+    kCFStringEncodingMacKorean,
+
+    kCFStringEncodingMacRoman,
+
+    kCFStringEncodingMacJapanese,
+    kCFStringEncodingMacChineseSimp,
+    
+    kCFStringEncodingMacJapanese,
+    kCFStringEncodingMacJapanese,
+    kCFStringEncodingMacJapanese,
+    kCFStringEncodingMacJapanese,
+    kCFStringEncodingMacChineseSimp,
+    kCFStringEncodingMacChineseSimp,
+    kCFStringEncodingMacKorean,
+    kCFStringEncodingMacJapanese,
+    kCFStringEncodingMacChineseSimp,
+    kCFStringEncodingMacChineseTrad,
+    kCFStringEncodingMacKorean,
+
+    kCFStringEncodingMacJapanese,
+
+    kCFStringEncodingMacCyrillic,
+
+    kCFStringEncodingMacChineseTrad,
+
+    kCFStringEncodingMacRoman,
+    kCFStringEncodingMacChineseSimp,
+    kCFStringEncodingMacChineseTrad,
+    kCFStringEncodingMacVietnamese,
+    kCFStringEncodingMacUkrainian,
+    kCFStringEncodingMacChineseTrad,
+    kCFStringEncodingMacRoman,
+    
+    kCFStringEncodingMacRoman,
+    
+    kCFStringEncodingMacRoman
+};
+
+static const char *__CFISONameList[] = {
+    "Western (ISO Latin 1)",
+    "Central European (ISO Latin 2)",
+    "Western (ISO Latin 3)",
+    "Central European (ISO Latin 4)",
+    "Cyrillic (ISO 8859-5)",
+    "Arabic (ISO 8859-6)",
+    "Greek (ISO 8859-7)",
+    "Hebrew (ISO 8859-8)",
+    "Turkish (ISO Latin 5)",
+    "Nordic (ISO Latin 6)",
+    "Thai (ISO 8859-11)",
+    NULL,
+    "Baltic (ISO Latin 7)",
+    "Celtic (ISO Latin 8)",
+    "Western (ISO Latin 9)",
+    "Romanian (ISO Latin 10)",
+};
+
+static const char *__CFOtherNameList[] = {
+    "Western (Mac OS Roman)",
+    "Japanese (Mac OS)",
+    "Traditional Chinese (Mac OS)",
+    "Korean (Mac OS)",
+    "Arabic (Mac OS)",
+    "Hebrew (Mac OS)",
+    "Greek (Mac OS)",
+    "Cyrillic (Mac OS)",
+    "Devanagari (Mac OS)",
+    "Gurmukhi (Mac OS)",
+    "Gujarati (Mac OS)",
+    "Oriya (Mac OS)",
+    "Bengali (Mac OS)",
+    "Tamil (Mac OS)",
+    "Telugu (Mac OS)",
+    "Kannada (Mac OS)",
+    "Malayalam (Mac OS)",
+    "Sinhalese (Mac OS)",
+    "Burmese (Mac OS)",
+    "Khmer (Mac OS)",
+    "Thai (Mac OS)",
+    "Laotian (Mac OS)",
+    "Georgian (Mac OS)",
+    "Armenian (Mac OS)",
+    "Simplified Chinese (Mac OS)",
+    "Tibetan (Mac OS)",
+    "Mongolian (Mac OS)",
+    "Ethiopic (Mac OS)",
+    "Central European (Mac OS)",
+    "Vietnamese (Mac OS)",
+    "Symbol (Mac OS)",
+    "Dingbats (Mac OS)",
+    "Turkish (Mac OS)",
+    "Croatian (Mac OS)",
+    "Icelandic (Mac OS)",
+    "Romanian (Mac OS)",
+    "Celtic (Mac OS)",
+    "Gaelic (Mac OS)",
+    "Farsi (Mac OS)",
+    "Cyrillic (Mac OS Ukrainian)",
+    "Inuit (Mac OS)",
+    "Latin-US (DOS)",
+    "Greek (DOS)",
+    "Baltic (DOS)",
+    "Western (DOS Latin 1)",
+    "Greek (DOS Greek 1)",
+    "Central European (DOS Latin 2)",
+    "Cyrillic (DOS)",
+    "Turkish (DOS)",
+    "Portuguese (DOS)",
+    "Icelandic (DOS)",
+    "Hebrew (DOS)",
+    "Canadian French (DOS)",
+    "Arabic (DOS)",
+    "Nordic (DOS)",
+    "Russian (DOS)",
+    "Greek (DOS Greek 2)",
+    "Thai (Windows, DOS)",
+    "Japanese (Windows, DOS)",
+    "Simplified Chinese (Windows, DOS)",
+    "Korean (Windows, DOS)",
+    "Traditional Chinese (Windows, DOS)",
+    "Western (Windows Latin 1)",
+    "Central European (Windows Latin 2)",
+    "Cyrillic (Windows)",
+    "Greek (Windows)",
+    "Turkish (Windows Latin 5)",
+    "Hebrew (Windows)",
+    "Arabic (Windows)",
+    "Baltic (Windows)",
+    "Vietnamese (Windows)",
+    "Korean (Windows Johab)",
+    "Western (ASCII)",
+    "Japanese (Shift JIS X0213)",
+    "Chinese (GB 18030)",
+    "Japanese (ISO 2022-JP)",
+    "Japanese (ISO 2022-JP-2)",
+    "Japanese (ISO 2022-JP-1)",
+    "Japanese (ISO 2022-JP-3)",
+    "Chinese (ISO 2022-CN)",
+    "Chinese (ISO 2022-CN-EXT)",
+    "Korean (ISO 2022-KR)",
+    "Japanese (EUC)",
+    "Simplified Chinese (GB 2312)",
+    "Traditional Chinese (EUC)",
+    "Korean (EUC)",
+    "Japanese (Shift JIS)",
+    "Cyrillic (KOI8-R)",
+    "Traditional Chinese (Big 5)",
+    "Western (Mac Mail)",
+    "Simplified Chinese (HZ GB 2312)",
+    "Traditional Chinese (Big 5 HKSCS)",
+    NULL,
+    "Ukrainian (KOI8-U)",
+    "Traditional Chinese (Big 5-E)",
+    NULL,
+    "Western (NextStep)",
+    "Western (EBCDIC Latin 1)",
+};
+#endif /* DEPLOYMENT_TARGET_MACOSX */
+
+__private_extern__ CFStringEncoding __CFStringEncodingGetMostCompatibleMacScript(CFStringEncoding encoding) {
+#if DEPLOYMENT_TARGET_MACOSX
+    switch (encoding & 0x0F00) {
+        case 0: return encoding & 0xFF; break; // Mac scripts
+
+        case 0x0100: return kCFStringEncodingUnicode; break; // Unicode
+
+        case 0x200: // ISO 8859
+            return (((encoding & 0xFF) <= (sizeof(__CFISO8859SimilarScriptList) / sizeof(*__CFISO8859SimilarScriptList))) ? __CFISO8859SimilarScriptList[(encoding & 0xFF) - 1] : kCFStringEncodingInvalidId);
+            break;
+
+        default: {
+            CFIndex index = __CFGetEncodingIndex(encoding);
+            
+            if (kCFNotFound != index) {
+                index -= __CFGetEncodingIndex(kCFStringEncodingDOSLatinUS);
+                return __CFOtherSimilarScriptList[index];
+            }
+        }
+    }
+#endif /* DEPLOYMENT_TARGET_MACOSX */
+
+    return kCFStringEncodingInvalidId;
+}
+
+__private_extern__ const char *__CFStringEncodingGetName(CFStringEncoding encoding) {
+    switch (encoding) {
+        case kCFStringEncodingUTF8: return "Unicode (UTF-8)"; break;
+        case kCFStringEncodingUTF16: return "Unicode (UTF-16)"; break;
+        case kCFStringEncodingUTF16BE: return "Unicode (UTF-16BE)"; break;
+        case kCFStringEncodingUTF16LE: return "Unicode (UTF-16LE)"; break;
+        case kCFStringEncodingUTF32: return "Unicode (UTF-32)"; break;
+        case kCFStringEncodingUTF32BE: return "Unicode (UTF-32BE)"; break;
+        case kCFStringEncodingUTF32LE: return "Unicode (UTF-32LE)"; break;
+        case kCFStringEncodingNonLossyASCII: return "Non-lossy ASCII"; break;
+        case kCFStringEncodingUTF7: return "Unicode (UTF-7)"; break;
+    }
+
+#if DEPLOYMENT_TARGET_MACOSX
+    if (0x0200 == (encoding & 0x0F00)) {
+        encoding &= 0x00FF;
+
+        if (encoding <= (sizeof(__CFISONameList) / sizeof(*__CFISONameList))) return __CFISONameList[encoding - 1];
+    } else {
+        CFIndex index = __CFGetEncodingIndex(encoding);
+
+        if (kCFNotFound != index) return __CFOtherNameList[index];
+    }
+#endif /* DEPLOYMENT_TARGET_MACOSX */
+    
+    return NULL;
+}