X-Git-Url: https://git.saurik.com/apple/cf.git/blobdiff_plain/47a9ab1f151d80a00a045f81937ddac81c51a463..bd5b749cf7786ae858ab372fc8f64179736c6515:/String.subproj/CFStringUtilities.c diff --git a/String.subproj/CFStringUtilities.c b/String.subproj/CFStringUtilities.c deleted file mode 100644 index 27bea85..0000000 --- a/String.subproj/CFStringUtilities.c +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* CFStringUtilities.c - Copyright 1999-2002, Apple, Inc. All rights reserved. - Responsibility: Aki Inoue -*/ - -#include "CFInternal.h" -#include "CFStringEncodingConverterExt.h" -#include "CFUniChar.h" -#include -#include -#if defined(__MACH__) || defined(__LINUX__) -#include -#elif defined(__WIN32__) -#include -#include -#endif - - -Boolean CFStringIsEncodingAvailable(CFStringEncoding theEncoding) { - switch (theEncoding) { - case kCFStringEncodingASCII: // Built-in encodings - case kCFStringEncodingMacRoman: - case kCFStringEncodingUTF8: - case kCFStringEncodingNonLossyASCII: - case kCFStringEncodingWindowsLatin1: - case kCFStringEncodingNextStepLatin: - case kCFStringEncodingUTF16: - case kCFStringEncodingUTF16BE: - case kCFStringEncodingUTF16LE: - case kCFStringEncodingUTF32: - case kCFStringEncodingUTF32BE: - case kCFStringEncodingUTF32LE: - return true; - - default: - return CFStringEncodingIsValidEncoding(theEncoding); - } -} - -const CFStringEncoding* CFStringGetListOfAvailableEncodings() { - return CFStringEncodingListOfAvailableEncodings(); -} - -CFStringRef CFStringGetNameOfEncoding(CFStringEncoding theEncoding) { - static CFMutableDictionaryRef mappingTable = NULL; - CFStringRef theName = mappingTable ? CFDictionaryGetValue(mappingTable, (const void*)theEncoding) : NULL; - - if (!theName) { - switch (theEncoding) { - case kCFStringEncodingUTF8: theName = CFSTR("Unicode (UTF-8)"); break; - case kCFStringEncodingUTF16: theName = CFSTR("Unicode (UTF-16)"); break; - case kCFStringEncodingUTF16BE: theName = CFSTR("Unicode (UTF-16BE)"); break; - case kCFStringEncodingUTF16LE: theName = CFSTR("Unicode (UTF-16LE)"); break; - case kCFStringEncodingUTF32: theName = CFSTR("Unicode (UTF-32)"); break; - case kCFStringEncodingUTF32BE: theName = CFSTR("Unicode (UTF-32BE)"); break; - case kCFStringEncodingUTF32LE: theName = CFSTR("Unicode (UTF-32LE)"); break; - case kCFStringEncodingNonLossyASCII: theName = CFSTR("Non-lossy ASCII"); break; - - default: { - const uint8_t *encodingName = CFStringEncodingName(theEncoding); - - if (encodingName) { - theName = CFStringCreateWithCString(NULL, encodingName, kCFStringEncodingASCII); - } - } - break; - } - - if (theName) { - if (!mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks); - - CFDictionaryAddValue(mappingTable, (const void*)theEncoding, (const void*)theName); - CFRelease(theName); - } - } - - return theName; -} - -CFStringEncoding CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName) { - static CFMutableDictionaryRef mappingTable = NULL; - CFStringEncoding result = kCFStringEncodingInvalidId; - CFMutableStringRef lowerCharsetName; - - /* Check for common encodings first */ - if (CFStringCompare(charsetName, CFSTR("utf-8"), kCFCompareCaseInsensitive) == kCFCompareEqualTo) { - return kCFStringEncodingUTF8; - } else if (CFStringCompare(charsetName, CFSTR("iso-8859-1"), kCFCompareCaseInsensitive) == kCFCompareEqualTo) { - return kCFStringEncodingISOLatin1; - } - - /* Create lowercase copy */ - lowerCharsetName = CFStringCreateMutableCopy(NULL, 0, charsetName); - CFStringLowercase(lowerCharsetName, NULL); - - if (mappingTable == NULL) { - CFMutableDictionaryRef table = CFDictionaryCreateMutable(NULL, 0, &kCFTypeDictionaryKeyCallBacks, (const CFDictionaryValueCallBacks *)NULL); - const CFStringEncoding *encodings = CFStringGetListOfAvailableEncodings(); - - while (*encodings != kCFStringEncodingInvalidId) { - const char **nameList = CFStringEncodingCanonicalCharsetNames(*encodings); - - if (nameList) { - while (*nameList) { - CFStringRef name = CFStringCreateWithCString(NULL, *nameList++, kCFStringEncodingASCII); - - if (name) { - CFDictionaryAddValue(table, (const void*)name, (const void*)*encodings); - CFRelease(name); - } - } - } - encodings++; - } - // Adding Unicode names - CFDictionaryAddValue(table, (const void*)CFSTR("unicode-1-1"), (const void*)kCFStringEncodingUTF16); - CFDictionaryAddValue(table, (const void*)CFSTR("iso-10646-ucs-2"), (const void*)kCFStringEncodingUTF16); - CFDictionaryAddValue(table, (const void*)CFSTR("utf-16"), (const void*)kCFStringEncodingUTF16); - CFDictionaryAddValue(table, (const void*)CFSTR("utf-16be"), (const void*)kCFStringEncodingUTF16BE); - CFDictionaryAddValue(table, (const void*)CFSTR("utf-16le"), (const void*)kCFStringEncodingUTF16LE); - CFDictionaryAddValue(table, (const void*)CFSTR("utf-32"), (const void*)kCFStringEncodingUTF32); - CFDictionaryAddValue(table, (const void*)CFSTR("utf-32be"), (const void*)kCFStringEncodingUTF32BE); - CFDictionaryAddValue(table, (const void*)CFSTR("utf-32le"), (const void*)kCFStringEncodingUTF32LE); - - mappingTable = table; - } - - if (CFDictionaryContainsKey(mappingTable, (const void*)lowerCharsetName)) { - result = (CFStringEncoding)CFDictionaryGetValue(mappingTable, (const void*)lowerCharsetName); - } - - CFRelease(lowerCharsetName); - - return result; -} - -CFStringRef CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding) { - static CFMutableDictionaryRef mappingTable = NULL; - CFStringRef theName = mappingTable ? (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)encoding) : NULL; - - if (!theName) { - switch (encoding) { - case kCFStringEncodingUTF16: theName = CFSTR("UTF-16"); break; - case kCFStringEncodingUTF16BE: theName = CFSTR("UTF-16BE"); break; - case kCFStringEncodingUTF16LE: theName = CFSTR("UTF-16LE"); break; - case kCFStringEncodingUTF32: theName = CFSTR("UTF-32"); break; - case kCFStringEncodingUTF32BE: theName = CFSTR("UTF-32BE"); break; - case kCFStringEncodingUTF32LE: theName = CFSTR("UTF-32LE"); break; - - - default: { - const char **nameList = CFStringEncodingCanonicalCharsetNames(encoding); - - if (nameList && *nameList) { - CFMutableStringRef upperCaseName; - - theName = CFStringCreateWithCString(NULL, *nameList, kCFStringEncodingASCII); - if (theName) { - upperCaseName = CFStringCreateMutableCopy(NULL, 0, theName); - CFStringUppercase(upperCaseName, 0); - CFRelease(theName); - theName = upperCaseName; - } - } - } - break; - } - - if (theName) { - if (!mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks); - - CFDictionaryAddValue(mappingTable, (const void*)encoding, (const void*)theName); - CFRelease(theName); - } - } - - return theName; -} - -enum { - NSASCIIStringEncoding = 1, /* 0..127 only */ - NSNEXTSTEPStringEncoding = 2, - NSJapaneseEUCStringEncoding = 3, - NSUTF8StringEncoding = 4, - NSISOLatin1StringEncoding = 5, - NSSymbolStringEncoding = 6, - NSNonLossyASCIIStringEncoding = 7, - NSShiftJISStringEncoding = 8, - NSISOLatin2StringEncoding = 9, - NSUnicodeStringEncoding = 10, - NSWindowsCP1251StringEncoding = 11, /* Cyrillic; same as AdobeStandardCyrillic */ - NSWindowsCP1252StringEncoding = 12, /* WinLatin1 */ - NSWindowsCP1253StringEncoding = 13, /* Greek */ - NSWindowsCP1254StringEncoding = 14, /* Turkish */ - NSWindowsCP1250StringEncoding = 15, /* WinLatin2 */ - NSISO2022JPStringEncoding = 21, /* ISO 2022 Japanese encoding for e-mail */ - NSMacOSRomanStringEncoding = 30, - - NSProprietaryStringEncoding = 65536 /* Installation-specific encoding */ -}; - -#define NSENCODING_MASK (1 << 31) - -UInt32 CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding) { - switch (theEncoding & 0xFFF) { - case kCFStringEncodingASCII: return NSASCIIStringEncoding; - case kCFStringEncodingNextStepLatin: return NSNEXTSTEPStringEncoding; - case kCFStringEncodingISOLatin1: return NSISOLatin1StringEncoding; - case kCFStringEncodingNonLossyASCII: return NSNonLossyASCIIStringEncoding; - case kCFStringEncodingWindowsLatin1: return NSWindowsCP1252StringEncoding; - case kCFStringEncodingMacRoman: return NSMacOSRomanStringEncoding; -#if defined(__MACH__) - case kCFStringEncodingEUC_JP: return NSJapaneseEUCStringEncoding; - case kCFStringEncodingMacSymbol: return NSSymbolStringEncoding; - case kCFStringEncodingDOSJapanese: return NSShiftJISStringEncoding; - case kCFStringEncodingISOLatin2: return NSISOLatin2StringEncoding; - case kCFStringEncodingWindowsCyrillic: return NSWindowsCP1251StringEncoding; - case kCFStringEncodingWindowsGreek: return NSWindowsCP1253StringEncoding; - case kCFStringEncodingWindowsLatin5: return NSWindowsCP1254StringEncoding; - case kCFStringEncodingWindowsLatin2: return NSWindowsCP1250StringEncoding; - case kCFStringEncodingISO_2022_JP: return NSISO2022JPStringEncoding; - case kCFStringEncodingUnicode: - if (theEncoding == kCFStringEncodingUTF16) return NSUnicodeStringEncoding; - else if (theEncoding == kCFStringEncodingUTF8) return NSUTF8StringEncoding; -#endif // __MACH__ - /* fall-through for other encoding schemes */ - - default: - return NSENCODING_MASK | theEncoding; - } -} - -CFStringEncoding CFStringConvertNSStringEncodingToEncoding(UInt32 theEncoding) { - switch (theEncoding) { - case NSASCIIStringEncoding: return kCFStringEncodingASCII; - case NSNEXTSTEPStringEncoding: return kCFStringEncodingNextStepLatin; - case NSUTF8StringEncoding: return kCFStringEncodingUTF8; - case NSISOLatin1StringEncoding: return kCFStringEncodingISOLatin1; - case NSNonLossyASCIIStringEncoding: return kCFStringEncodingNonLossyASCII; - case NSUnicodeStringEncoding: return kCFStringEncodingUTF16; - case NSWindowsCP1252StringEncoding: return kCFStringEncodingWindowsLatin1; - case NSMacOSRomanStringEncoding: return kCFStringEncodingMacRoman; -#if defined(__MACH__) - case NSSymbolStringEncoding: return kCFStringEncodingMacSymbol; - case NSJapaneseEUCStringEncoding: return kCFStringEncodingEUC_JP; - case NSShiftJISStringEncoding: return kCFStringEncodingDOSJapanese; - case NSISO2022JPStringEncoding: return kCFStringEncodingISO_2022_JP; - case NSISOLatin2StringEncoding: return kCFStringEncodingISOLatin2; - case NSWindowsCP1251StringEncoding: return kCFStringEncodingWindowsCyrillic; - case NSWindowsCP1253StringEncoding: return kCFStringEncodingWindowsGreek; - case NSWindowsCP1254StringEncoding: return kCFStringEncodingWindowsLatin5; - case NSWindowsCP1250StringEncoding: return kCFStringEncodingWindowsLatin2; -#endif // __MACH__ - default: - return ((theEncoding & NSENCODING_MASK) ? theEncoding & ~NSENCODING_MASK : kCFStringEncodingInvalidId); - } -} - -#define MACCODEPAGE_BASE (10000) -#define ISO8859CODEPAGE_BASE (28590) - -static const uint16_t _CFToDOSCodePageList[] = { - 437, -1, -1, -1, -1, 737, 775, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x400 - 850, 851, 852, 855, 857, 860, 861, 862, 863, 864, 865, 866, 869, 874, -1, 01, // 0x410 - 932, 936, 949 , 950, // 0x420 -}; - -static const uint16_t _CFToWindowsCodePageList[] = { - 1252, 1250, 1251, 1253, 1254, 1255, 1256, 1257, 1258, -}; - -static const uint16_t _CFEUCToCodePage[] = { // 0x900 - 51932, 51936, 51950, 51949, -}; - -UInt32 CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding) { -#if defined(__MACH__) - CFStringEncoding encodingBase = theEncoding & 0x0FFF; -#endif - - switch (theEncoding & 0x0F00) { -#if defined(__MACH__) - case 0: // Mac OS script - if (encodingBase <= kCFStringEncodingMacCentralEurRoman) { - return MACCODEPAGE_BASE + encodingBase; - } else if (encodingBase == kCFStringEncodingMacTurkish) { - return 10081; - } else if (encodingBase == kCFStringEncodingMacCroatian) { - return 10082; - } else if (encodingBase == kCFStringEncodingMacIcelandic) { - return 10079; - } - break; -#endif - - case 0x100: // Unicode - switch (theEncoding) { - case kCFStringEncodingUTF8: return 65001; - case kCFStringEncodingUTF16: return 1200; - case kCFStringEncodingUTF16BE: return 1201; - case kCFStringEncodingUTF32: return 65005; - case kCFStringEncodingUTF32BE: return 65006; - } - break; - -#if defined(__MACH__) - case 0x0200: // ISO 8859 series - if (encodingBase <= kCFStringEncodingISOLatin10) return ISO8859CODEPAGE_BASE + (encodingBase - 0x200); - break; - - case 0x0400: // DOS codepage - if (encodingBase <= kCFStringEncodingDOSChineseTrad) return _CFToDOSCodePageList[encodingBase - 0x400]; - break; - - case 0x0500: // ANSI (Windows) codepage - if (encodingBase <= kCFStringEncodingWindowsVietnamese) return _CFToWindowsCodePageList[theEncoding - 0x500]; - else if (encodingBase == kCFStringEncodingWindowsKoreanJohab) return 1361; - break; - - case 0x600: // National standards - if (encodingBase == kCFStringEncodingASCII) return 20127; - else if (encodingBase == kCFStringEncodingGB_18030_2000) return 54936; - break; - - case 0x0800: // ISO 2022 series - switch (encodingBase) { - case kCFStringEncodingISO_2022_JP: return 50220; - case kCFStringEncodingISO_2022_CN: return 50227; - case kCFStringEncodingISO_2022_KR: return 50225; - } - break; - - case 0x0900: // EUC series - if (encodingBase <= kCFStringEncodingEUC_KR) return _CFEUCToCodePage[encodingBase - 0x0900]; - break; - - - case 0x0A00: // Misc encodings - switch (encodingBase) { - case kCFStringEncodingKOI8_R: return 20866; - case kCFStringEncodingHZ_GB_2312: return 52936; - case kCFStringEncodingKOI8_U: return 21866; - } - break; - - case 0x0C00: // IBM EBCDIC encodings - if (encodingBase == kCFStringEncodingEBCDIC_CP037) return 37; - break; -#endif // __MACH__ - } - - return kCFStringEncodingInvalidId; -} - -#if defined(__MACH__) -static const struct { - uint16_t acp; - uint16_t encoding; -} _CFACPToCFTable[] = { - {37, kCFStringEncodingEBCDIC_CP037}, - {437, kCFStringEncodingDOSLatinUS}, - {737, kCFStringEncodingDOSGreek}, - {775, kCFStringEncodingDOSBalticRim}, - {850, kCFStringEncodingDOSLatin1}, - {851, kCFStringEncodingDOSGreek1}, - {852, kCFStringEncodingDOSLatin2}, - {855, kCFStringEncodingDOSCyrillic}, - {857, kCFStringEncodingDOSTurkish}, - {860, kCFStringEncodingDOSPortuguese}, - {861, kCFStringEncodingDOSIcelandic}, - {862, kCFStringEncodingDOSHebrew}, - {863, kCFStringEncodingDOSCanadianFrench}, - {864, kCFStringEncodingDOSArabic}, - {865, kCFStringEncodingDOSNordic}, - {866, kCFStringEncodingDOSRussian}, - {869, kCFStringEncodingDOSGreek2}, - {874, kCFStringEncodingDOSThai}, - {932, kCFStringEncodingDOSJapanese}, - {936, kCFStringEncodingDOSChineseSimplif}, - {949, kCFStringEncodingDOSKorean}, - {950, kCFStringEncodingDOSChineseTrad}, - {1250, kCFStringEncodingWindowsLatin2}, - {1251, kCFStringEncodingWindowsCyrillic}, - {1252, kCFStringEncodingWindowsLatin1}, - {1253, kCFStringEncodingWindowsGreek}, - {1254, kCFStringEncodingWindowsLatin5}, - {1255, kCFStringEncodingWindowsHebrew}, - {1256, kCFStringEncodingWindowsArabic}, - {1257, kCFStringEncodingWindowsBalticRim}, - {1258, kCFStringEncodingWindowsVietnamese}, - {1361, kCFStringEncodingWindowsKoreanJohab}, - {20127, kCFStringEncodingASCII}, - {20866, kCFStringEncodingKOI8_R}, - {21866, kCFStringEncodingKOI8_U}, - {50220, kCFStringEncodingISO_2022_JP}, - {50225, kCFStringEncodingISO_2022_KR}, - {50227, kCFStringEncodingISO_2022_CN}, - {51932, kCFStringEncodingEUC_JP}, - {51936, kCFStringEncodingEUC_CN}, - {51949, kCFStringEncodingEUC_KR}, - {51950, kCFStringEncodingEUC_TW}, - {52936, kCFStringEncodingHZ_GB_2312}, - {54936, kCFStringEncodingGB_18030_2000}, -}; - -static SInt32 bsearchEncoding(uint16_t target) { - const unsigned int *start, *end, *divider; - unsigned int size = sizeof(_CFACPToCFTable) / sizeof(UInt32); - - start = (const unsigned int*)_CFACPToCFTable; end = (const unsigned int*)_CFACPToCFTable + (size - 1); - while (start <= end) { - divider = start + ((end - start) / 2); - - if (*(const uint16_t*)divider == target) return *((const uint16_t*)divider + 1); - else if (*(const uint16_t*)divider > target) end = divider - 1; - else if (*(const uint16_t*)(divider + 1) > target) return *((const uint16_t*)divider + 1); - else start = divider + 1; - } - return (kCFStringEncodingInvalidId); -} -#endif - -CFStringEncoding CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding) { - if (theEncoding == 0 || theEncoding == 1) { // ID for default (system) codepage - return CFStringGetSystemEncoding(); - } else if ((theEncoding >= MACCODEPAGE_BASE) && (theEncoding < 20000)) { // Mac script - if (theEncoding <= 10029) return theEncoding - MACCODEPAGE_BASE; // up to Mac Central European -#if defined(__MACH__) - else if (theEncoding == 10079) return kCFStringEncodingMacIcelandic; - else if (theEncoding == 10081) return kCFStringEncodingMacTurkish; - else if (theEncoding == 10082) return kCFStringEncodingMacCroatian; -#endif - } else if ((theEncoding >= ISO8859CODEPAGE_BASE) && (theEncoding <= 28605)) { // ISO 8859 - return (theEncoding - ISO8859CODEPAGE_BASE) + 0x200; - } else if (theEncoding == 65001) { // UTF-8 - return kCFStringEncodingUTF8; - } else if (theEncoding == 12000) { // UTF-16 - return kCFStringEncodingUTF16; - } else if (theEncoding == 12001) { // UTF-16BE - return kCFStringEncodingUTF16BE; - } else if (theEncoding == 65005) { // UTF-32 - return kCFStringEncodingUTF32; - } else if (theEncoding == 65006) { // UTF-32BE - return kCFStringEncodingUTF32BE; - } else { -#if defined(__MACH__) - return bsearchEncoding(theEncoding); -#endif - } - - return kCFStringEncodingInvalidId; -} - -CFStringEncoding CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding) { - CFStringEncoding macEncoding; - - macEncoding = CFStringEncodingGetScriptCodeForEncoding(encoding); - - return macEncoding; -} - -