+++ /dev/null
-/*
- * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_LICENSE_HEADER_END@
- */
-/* CFStringUtilities.c
- Copyright 1999-2002, Apple, Inc. All rights reserved.
- Responsibility: Aki Inoue
-*/
-
-#include "CFInternal.h"
-#include "CFStringEncodingConverterExt.h"
-#include "CFUniChar.h"
-#include <CoreFoundation/CFStringEncodingExt.h>
-#include <limits.h>
-#if defined(__MACH__) || defined(__LINUX__)
-#include <stdlib.h>
-#elif defined(__WIN32__)
-#include <stdlib.h>
-#include <tchar.h>
-#endif
-
-
-Boolean CFStringIsEncodingAvailable(CFStringEncoding theEncoding) {
- switch (theEncoding) {
- case kCFStringEncodingASCII: // Built-in encodings
- case kCFStringEncodingMacRoman:
- case kCFStringEncodingUTF8:
- case kCFStringEncodingNonLossyASCII:
- case kCFStringEncodingWindowsLatin1:
- case kCFStringEncodingNextStepLatin:
- case kCFStringEncodingUTF16:
- case kCFStringEncodingUTF16BE:
- case kCFStringEncodingUTF16LE:
- case kCFStringEncodingUTF32:
- case kCFStringEncodingUTF32BE:
- case kCFStringEncodingUTF32LE:
- return true;
-
- default:
- return CFStringEncodingIsValidEncoding(theEncoding);
- }
-}
-
-const CFStringEncoding* CFStringGetListOfAvailableEncodings() {
- return CFStringEncodingListOfAvailableEncodings();
-}
-
-CFStringRef CFStringGetNameOfEncoding(CFStringEncoding theEncoding) {
- static CFMutableDictionaryRef mappingTable = NULL;
- CFStringRef theName = mappingTable ? CFDictionaryGetValue(mappingTable, (const void*)theEncoding) : NULL;
-
- if (!theName) {
- switch (theEncoding) {
- case kCFStringEncodingUTF8: theName = CFSTR("Unicode (UTF-8)"); break;
- case kCFStringEncodingUTF16: theName = CFSTR("Unicode (UTF-16)"); break;
- case kCFStringEncodingUTF16BE: theName = CFSTR("Unicode (UTF-16BE)"); break;
- case kCFStringEncodingUTF16LE: theName = CFSTR("Unicode (UTF-16LE)"); break;
- case kCFStringEncodingUTF32: theName = CFSTR("Unicode (UTF-32)"); break;
- case kCFStringEncodingUTF32BE: theName = CFSTR("Unicode (UTF-32BE)"); break;
- case kCFStringEncodingUTF32LE: theName = CFSTR("Unicode (UTF-32LE)"); break;
- case kCFStringEncodingNonLossyASCII: theName = CFSTR("Non-lossy ASCII"); break;
-
- default: {
- const uint8_t *encodingName = CFStringEncodingName(theEncoding);
-
- if (encodingName) {
- theName = CFStringCreateWithCString(NULL, encodingName, kCFStringEncodingASCII);
- }
- }
- break;
- }
-
- if (theName) {
- if (!mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks);
-
- CFDictionaryAddValue(mappingTable, (const void*)theEncoding, (const void*)theName);
- CFRelease(theName);
- }
- }
-
- return theName;
-}
-
-CFStringEncoding CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName) {
- static CFMutableDictionaryRef mappingTable = NULL;
- CFStringEncoding result = kCFStringEncodingInvalidId;
- CFMutableStringRef lowerCharsetName;
-
- /* Check for common encodings first */
- if (CFStringCompare(charsetName, CFSTR("utf-8"), kCFCompareCaseInsensitive) == kCFCompareEqualTo) {
- return kCFStringEncodingUTF8;
- } else if (CFStringCompare(charsetName, CFSTR("iso-8859-1"), kCFCompareCaseInsensitive) == kCFCompareEqualTo) {
- return kCFStringEncodingISOLatin1;
- }
-
- /* Create lowercase copy */
- lowerCharsetName = CFStringCreateMutableCopy(NULL, 0, charsetName);
- CFStringLowercase(lowerCharsetName, NULL);
-
- if (mappingTable == NULL) {
- CFMutableDictionaryRef table = CFDictionaryCreateMutable(NULL, 0, &kCFTypeDictionaryKeyCallBacks, (const CFDictionaryValueCallBacks *)NULL);
- const CFStringEncoding *encodings = CFStringGetListOfAvailableEncodings();
-
- while (*encodings != kCFStringEncodingInvalidId) {
- const char **nameList = CFStringEncodingCanonicalCharsetNames(*encodings);
-
- if (nameList) {
- while (*nameList) {
- CFStringRef name = CFStringCreateWithCString(NULL, *nameList++, kCFStringEncodingASCII);
-
- if (name) {
- CFDictionaryAddValue(table, (const void*)name, (const void*)*encodings);
- CFRelease(name);
- }
- }
- }
- encodings++;
- }
- // Adding Unicode names
- CFDictionaryAddValue(table, (const void*)CFSTR("unicode-1-1"), (const void*)kCFStringEncodingUTF16);
- CFDictionaryAddValue(table, (const void*)CFSTR("iso-10646-ucs-2"), (const void*)kCFStringEncodingUTF16);
- CFDictionaryAddValue(table, (const void*)CFSTR("utf-16"), (const void*)kCFStringEncodingUTF16);
- CFDictionaryAddValue(table, (const void*)CFSTR("utf-16be"), (const void*)kCFStringEncodingUTF16BE);
- CFDictionaryAddValue(table, (const void*)CFSTR("utf-16le"), (const void*)kCFStringEncodingUTF16LE);
- CFDictionaryAddValue(table, (const void*)CFSTR("utf-32"), (const void*)kCFStringEncodingUTF32);
- CFDictionaryAddValue(table, (const void*)CFSTR("utf-32be"), (const void*)kCFStringEncodingUTF32BE);
- CFDictionaryAddValue(table, (const void*)CFSTR("utf-32le"), (const void*)kCFStringEncodingUTF32LE);
-
- mappingTable = table;
- }
-
- if (CFDictionaryContainsKey(mappingTable, (const void*)lowerCharsetName)) {
- result = (CFStringEncoding)CFDictionaryGetValue(mappingTable, (const void*)lowerCharsetName);
- }
-
- CFRelease(lowerCharsetName);
-
- return result;
-}
-
-CFStringRef CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding) {
- static CFMutableDictionaryRef mappingTable = NULL;
- CFStringRef theName = mappingTable ? (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)encoding) : NULL;
-
- if (!theName) {
- switch (encoding) {
- case kCFStringEncodingUTF16: theName = CFSTR("UTF-16"); break;
- case kCFStringEncodingUTF16BE: theName = CFSTR("UTF-16BE"); break;
- case kCFStringEncodingUTF16LE: theName = CFSTR("UTF-16LE"); break;
- case kCFStringEncodingUTF32: theName = CFSTR("UTF-32"); break;
- case kCFStringEncodingUTF32BE: theName = CFSTR("UTF-32BE"); break;
- case kCFStringEncodingUTF32LE: theName = CFSTR("UTF-32LE"); break;
-
-
- default: {
- const char **nameList = CFStringEncodingCanonicalCharsetNames(encoding);
-
- if (nameList && *nameList) {
- CFMutableStringRef upperCaseName;
-
- theName = CFStringCreateWithCString(NULL, *nameList, kCFStringEncodingASCII);
- if (theName) {
- upperCaseName = CFStringCreateMutableCopy(NULL, 0, theName);
- CFStringUppercase(upperCaseName, 0);
- CFRelease(theName);
- theName = upperCaseName;
- }
- }
- }
- break;
- }
-
- if (theName) {
- if (!mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks);
-
- CFDictionaryAddValue(mappingTable, (const void*)encoding, (const void*)theName);
- CFRelease(theName);
- }
- }
-
- return theName;
-}
-
-enum {
- NSASCIIStringEncoding = 1, /* 0..127 only */
- NSNEXTSTEPStringEncoding = 2,
- NSJapaneseEUCStringEncoding = 3,
- NSUTF8StringEncoding = 4,
- NSISOLatin1StringEncoding = 5,
- NSSymbolStringEncoding = 6,
- NSNonLossyASCIIStringEncoding = 7,
- NSShiftJISStringEncoding = 8,
- NSISOLatin2StringEncoding = 9,
- NSUnicodeStringEncoding = 10,
- NSWindowsCP1251StringEncoding = 11, /* Cyrillic; same as AdobeStandardCyrillic */
- NSWindowsCP1252StringEncoding = 12, /* WinLatin1 */
- NSWindowsCP1253StringEncoding = 13, /* Greek */
- NSWindowsCP1254StringEncoding = 14, /* Turkish */
- NSWindowsCP1250StringEncoding = 15, /* WinLatin2 */
- NSISO2022JPStringEncoding = 21, /* ISO 2022 Japanese encoding for e-mail */
- NSMacOSRomanStringEncoding = 30,
-
- NSProprietaryStringEncoding = 65536 /* Installation-specific encoding */
-};
-
-#define NSENCODING_MASK (1 << 31)
-
-UInt32 CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding) {
- switch (theEncoding & 0xFFF) {
- case kCFStringEncodingASCII: return NSASCIIStringEncoding;
- case kCFStringEncodingNextStepLatin: return NSNEXTSTEPStringEncoding;
- case kCFStringEncodingISOLatin1: return NSISOLatin1StringEncoding;
- case kCFStringEncodingNonLossyASCII: return NSNonLossyASCIIStringEncoding;
- case kCFStringEncodingWindowsLatin1: return NSWindowsCP1252StringEncoding;
- case kCFStringEncodingMacRoman: return NSMacOSRomanStringEncoding;
-#if defined(__MACH__)
- case kCFStringEncodingEUC_JP: return NSJapaneseEUCStringEncoding;
- case kCFStringEncodingMacSymbol: return NSSymbolStringEncoding;
- case kCFStringEncodingDOSJapanese: return NSShiftJISStringEncoding;
- case kCFStringEncodingISOLatin2: return NSISOLatin2StringEncoding;
- case kCFStringEncodingWindowsCyrillic: return NSWindowsCP1251StringEncoding;
- case kCFStringEncodingWindowsGreek: return NSWindowsCP1253StringEncoding;
- case kCFStringEncodingWindowsLatin5: return NSWindowsCP1254StringEncoding;
- case kCFStringEncodingWindowsLatin2: return NSWindowsCP1250StringEncoding;
- case kCFStringEncodingISO_2022_JP: return NSISO2022JPStringEncoding;
- case kCFStringEncodingUnicode:
- if (theEncoding == kCFStringEncodingUTF16) return NSUnicodeStringEncoding;
- else if (theEncoding == kCFStringEncodingUTF8) return NSUTF8StringEncoding;
-#endif // __MACH__
- /* fall-through for other encoding schemes */
-
- default:
- return NSENCODING_MASK | theEncoding;
- }
-}
-
-CFStringEncoding CFStringConvertNSStringEncodingToEncoding(UInt32 theEncoding) {
- switch (theEncoding) {
- case NSASCIIStringEncoding: return kCFStringEncodingASCII;
- case NSNEXTSTEPStringEncoding: return kCFStringEncodingNextStepLatin;
- case NSUTF8StringEncoding: return kCFStringEncodingUTF8;
- case NSISOLatin1StringEncoding: return kCFStringEncodingISOLatin1;
- case NSNonLossyASCIIStringEncoding: return kCFStringEncodingNonLossyASCII;
- case NSUnicodeStringEncoding: return kCFStringEncodingUTF16;
- case NSWindowsCP1252StringEncoding: return kCFStringEncodingWindowsLatin1;
- case NSMacOSRomanStringEncoding: return kCFStringEncodingMacRoman;
-#if defined(__MACH__)
- case NSSymbolStringEncoding: return kCFStringEncodingMacSymbol;
- case NSJapaneseEUCStringEncoding: return kCFStringEncodingEUC_JP;
- case NSShiftJISStringEncoding: return kCFStringEncodingDOSJapanese;
- case NSISO2022JPStringEncoding: return kCFStringEncodingISO_2022_JP;
- case NSISOLatin2StringEncoding: return kCFStringEncodingISOLatin2;
- case NSWindowsCP1251StringEncoding: return kCFStringEncodingWindowsCyrillic;
- case NSWindowsCP1253StringEncoding: return kCFStringEncodingWindowsGreek;
- case NSWindowsCP1254StringEncoding: return kCFStringEncodingWindowsLatin5;
- case NSWindowsCP1250StringEncoding: return kCFStringEncodingWindowsLatin2;
-#endif // __MACH__
- default:
- return ((theEncoding & NSENCODING_MASK) ? theEncoding & ~NSENCODING_MASK : kCFStringEncodingInvalidId);
- }
-}
-
-#define MACCODEPAGE_BASE (10000)
-#define ISO8859CODEPAGE_BASE (28590)
-
-static const uint16_t _CFToDOSCodePageList[] = {
- 437, -1, -1, -1, -1, 737, 775, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x400
- 850, 851, 852, 855, 857, 860, 861, 862, 863, 864, 865, 866, 869, 874, -1, 01, // 0x410
- 932, 936, 949 , 950, // 0x420
-};
-
-static const uint16_t _CFToWindowsCodePageList[] = {
- 1252, 1250, 1251, 1253, 1254, 1255, 1256, 1257, 1258,
-};
-
-static const uint16_t _CFEUCToCodePage[] = { // 0x900
- 51932, 51936, 51950, 51949,
-};
-
-UInt32 CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding) {
-#if defined(__MACH__)
- CFStringEncoding encodingBase = theEncoding & 0x0FFF;
-#endif
-
- switch (theEncoding & 0x0F00) {
-#if defined(__MACH__)
- case 0: // Mac OS script
- if (encodingBase <= kCFStringEncodingMacCentralEurRoman) {
- return MACCODEPAGE_BASE + encodingBase;
- } else if (encodingBase == kCFStringEncodingMacTurkish) {
- return 10081;
- } else if (encodingBase == kCFStringEncodingMacCroatian) {
- return 10082;
- } else if (encodingBase == kCFStringEncodingMacIcelandic) {
- return 10079;
- }
- break;
-#endif
-
- case 0x100: // Unicode
- switch (theEncoding) {
- case kCFStringEncodingUTF8: return 65001;
- case kCFStringEncodingUTF16: return 1200;
- case kCFStringEncodingUTF16BE: return 1201;
- case kCFStringEncodingUTF32: return 65005;
- case kCFStringEncodingUTF32BE: return 65006;
- }
- break;
-
-#if defined(__MACH__)
- case 0x0200: // ISO 8859 series
- if (encodingBase <= kCFStringEncodingISOLatin10) return ISO8859CODEPAGE_BASE + (encodingBase - 0x200);
- break;
-
- case 0x0400: // DOS codepage
- if (encodingBase <= kCFStringEncodingDOSChineseTrad) return _CFToDOSCodePageList[encodingBase - 0x400];
- break;
-
- case 0x0500: // ANSI (Windows) codepage
- if (encodingBase <= kCFStringEncodingWindowsVietnamese) return _CFToWindowsCodePageList[theEncoding - 0x500];
- else if (encodingBase == kCFStringEncodingWindowsKoreanJohab) return 1361;
- break;
-
- case 0x600: // National standards
- if (encodingBase == kCFStringEncodingASCII) return 20127;
- else if (encodingBase == kCFStringEncodingGB_18030_2000) return 54936;
- break;
-
- case 0x0800: // ISO 2022 series
- switch (encodingBase) {
- case kCFStringEncodingISO_2022_JP: return 50220;
- case kCFStringEncodingISO_2022_CN: return 50227;
- case kCFStringEncodingISO_2022_KR: return 50225;
- }
- break;
-
- case 0x0900: // EUC series
- if (encodingBase <= kCFStringEncodingEUC_KR) return _CFEUCToCodePage[encodingBase - 0x0900];
- break;
-
-
- case 0x0A00: // Misc encodings
- switch (encodingBase) {
- case kCFStringEncodingKOI8_R: return 20866;
- case kCFStringEncodingHZ_GB_2312: return 52936;
- case kCFStringEncodingKOI8_U: return 21866;
- }
- break;
-
- case 0x0C00: // IBM EBCDIC encodings
- if (encodingBase == kCFStringEncodingEBCDIC_CP037) return 37;
- break;
-#endif // __MACH__
- }
-
- return kCFStringEncodingInvalidId;
-}
-
-#if defined(__MACH__)
-static const struct {
- uint16_t acp;
- uint16_t encoding;
-} _CFACPToCFTable[] = {
- {37, kCFStringEncodingEBCDIC_CP037},
- {437, kCFStringEncodingDOSLatinUS},
- {737, kCFStringEncodingDOSGreek},
- {775, kCFStringEncodingDOSBalticRim},
- {850, kCFStringEncodingDOSLatin1},
- {851, kCFStringEncodingDOSGreek1},
- {852, kCFStringEncodingDOSLatin2},
- {855, kCFStringEncodingDOSCyrillic},
- {857, kCFStringEncodingDOSTurkish},
- {860, kCFStringEncodingDOSPortuguese},
- {861, kCFStringEncodingDOSIcelandic},
- {862, kCFStringEncodingDOSHebrew},
- {863, kCFStringEncodingDOSCanadianFrench},
- {864, kCFStringEncodingDOSArabic},
- {865, kCFStringEncodingDOSNordic},
- {866, kCFStringEncodingDOSRussian},
- {869, kCFStringEncodingDOSGreek2},
- {874, kCFStringEncodingDOSThai},
- {932, kCFStringEncodingDOSJapanese},
- {936, kCFStringEncodingDOSChineseSimplif},
- {949, kCFStringEncodingDOSKorean},
- {950, kCFStringEncodingDOSChineseTrad},
- {1250, kCFStringEncodingWindowsLatin2},
- {1251, kCFStringEncodingWindowsCyrillic},
- {1252, kCFStringEncodingWindowsLatin1},
- {1253, kCFStringEncodingWindowsGreek},
- {1254, kCFStringEncodingWindowsLatin5},
- {1255, kCFStringEncodingWindowsHebrew},
- {1256, kCFStringEncodingWindowsArabic},
- {1257, kCFStringEncodingWindowsBalticRim},
- {1258, kCFStringEncodingWindowsVietnamese},
- {1361, kCFStringEncodingWindowsKoreanJohab},
- {20127, kCFStringEncodingASCII},
- {20866, kCFStringEncodingKOI8_R},
- {21866, kCFStringEncodingKOI8_U},
- {50220, kCFStringEncodingISO_2022_JP},
- {50225, kCFStringEncodingISO_2022_KR},
- {50227, kCFStringEncodingISO_2022_CN},
- {51932, kCFStringEncodingEUC_JP},
- {51936, kCFStringEncodingEUC_CN},
- {51949, kCFStringEncodingEUC_KR},
- {51950, kCFStringEncodingEUC_TW},
- {52936, kCFStringEncodingHZ_GB_2312},
- {54936, kCFStringEncodingGB_18030_2000},
-};
-
-static SInt32 bsearchEncoding(uint16_t target) {
- const unsigned int *start, *end, *divider;
- unsigned int size = sizeof(_CFACPToCFTable) / sizeof(UInt32);
-
- start = (const unsigned int*)_CFACPToCFTable; end = (const unsigned int*)_CFACPToCFTable + (size - 1);
- while (start <= end) {
- divider = start + ((end - start) / 2);
-
- if (*(const uint16_t*)divider == target) return *((const uint16_t*)divider + 1);
- else if (*(const uint16_t*)divider > target) end = divider - 1;
- else if (*(const uint16_t*)(divider + 1) > target) return *((const uint16_t*)divider + 1);
- else start = divider + 1;
- }
- return (kCFStringEncodingInvalidId);
-}
-#endif
-
-CFStringEncoding CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding) {
- if (theEncoding == 0 || theEncoding == 1) { // ID for default (system) codepage
- return CFStringGetSystemEncoding();
- } else if ((theEncoding >= MACCODEPAGE_BASE) && (theEncoding < 20000)) { // Mac script
- if (theEncoding <= 10029) return theEncoding - MACCODEPAGE_BASE; // up to Mac Central European
-#if defined(__MACH__)
- else if (theEncoding == 10079) return kCFStringEncodingMacIcelandic;
- else if (theEncoding == 10081) return kCFStringEncodingMacTurkish;
- else if (theEncoding == 10082) return kCFStringEncodingMacCroatian;
-#endif
- } else if ((theEncoding >= ISO8859CODEPAGE_BASE) && (theEncoding <= 28605)) { // ISO 8859
- return (theEncoding - ISO8859CODEPAGE_BASE) + 0x200;
- } else if (theEncoding == 65001) { // UTF-8
- return kCFStringEncodingUTF8;
- } else if (theEncoding == 12000) { // UTF-16
- return kCFStringEncodingUTF16;
- } else if (theEncoding == 12001) { // UTF-16BE
- return kCFStringEncodingUTF16BE;
- } else if (theEncoding == 65005) { // UTF-32
- return kCFStringEncodingUTF32;
- } else if (theEncoding == 65006) { // UTF-32BE
- return kCFStringEncodingUTF32BE;
- } else {
-#if defined(__MACH__)
- return bsearchEncoding(theEncoding);
-#endif
- }
-
- return kCFStringEncodingInvalidId;
-}
-
-CFStringEncoding CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding) {
- CFStringEncoding macEncoding;
-
- macEncoding = CFStringEncodingGetScriptCodeForEncoding(encoding);
-
- return macEncoding;
-}
-
-