2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* CFStringUtilities.c
26 Copyright 1999-2002, Apple, Inc. All rights reserved.
27 Responsibility: Aki Inoue
30 #include "CFInternal.h"
31 #include "CFStringEncodingConverterExt.h"
32 #include "CFUniChar.h"
34 #if defined(__MACH__) || defined(__LINUX__)
36 #elif defined(__WIN32__)
43 Boolean
CFStringIsEncodingAvailable(CFStringEncoding theEncoding
) {
44 switch (theEncoding
) {
45 case kCFStringEncodingASCII
: // Built-in encodings
46 case kCFStringEncodingMacRoman
:
47 case kCFStringEncodingUnicode
:
48 case kCFStringEncodingUTF8
:
49 case kCFStringEncodingNonLossyASCII
:
50 case kCFStringEncodingWindowsLatin1
:
51 case kCFStringEncodingNextStepLatin
:
55 return CFStringEncodingIsValidEncoding(theEncoding
);
59 const CFStringEncoding
* CFStringGetListOfAvailableEncodings() {
60 return CFStringEncodingListOfAvailableEncodings();
63 CFStringRef
CFStringGetNameOfEncoding(CFStringEncoding theEncoding
) {
64 static CFMutableDictionaryRef mappingTable
= NULL
;
65 CFStringRef theName
= mappingTable
? CFDictionaryGetValue(mappingTable
, (const void*)theEncoding
) : NULL
;
68 if (theEncoding
== kCFStringEncodingUnicode
) {
69 theName
= CFSTR("Unicode (UTF-16)");
70 } else if (theEncoding
== kCFStringEncodingUTF8
) {
71 theName
= CFSTR("Unicode (UTF-8)");
72 } else if (theEncoding
== kCFStringEncodingNonLossyASCII
) {
73 theName
= CFSTR("Non-lossy ASCII");
75 const uint8_t *encodingName
= CFStringEncodingName(theEncoding
);
78 theName
= CFStringCreateWithCString(NULL
, encodingName
, kCFStringEncodingASCII
);
82 if (!mappingTable
) mappingTable
= CFDictionaryCreateMutable(NULL
, 0, (const CFDictionaryKeyCallBacks
*)NULL
, &kCFTypeDictionaryValueCallBacks
);
84 CFDictionaryAddValue(mappingTable
, (const void*)theEncoding
, (const void*)theName
);
93 CFStringEncoding
CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName
) {
94 static CFMutableDictionaryRef mappingTable
= NULL
;
95 CFStringEncoding result
= kCFStringEncodingInvalidId
;
96 CFMutableStringRef lowerCharsetName
= CFStringCreateMutableCopy(NULL
, 0, charsetName
);
98 /* Create lowercase copy */
99 CFStringLowercase(lowerCharsetName
, NULL
);
101 /* Check for common encodings first */
102 if (CFStringCompare(lowerCharsetName
, CFSTR("utf-8"), kCFCompareCaseInsensitive
) == kCFCompareEqualTo
) {
103 CFRelease(lowerCharsetName
);
104 return kCFStringEncodingUTF8
;
105 } else if (CFStringCompare(lowerCharsetName
, CFSTR("iso-8859-1"), kCFCompareCaseInsensitive
) == kCFCompareEqualTo
) {
106 CFRelease(lowerCharsetName
);
107 return kCFStringEncodingISOLatin1
;
108 } else if (CFStringCompare(lowerCharsetName
, CFSTR("utf-16be"), kCFCompareCaseInsensitive
) == kCFCompareEqualTo
) {
109 CFRelease(lowerCharsetName
);
110 return kCFStringEncodingUnicode
;
113 if (mappingTable
== NULL
) {
114 CFMutableDictionaryRef table
= CFDictionaryCreateMutable(NULL
, 0, &kCFTypeDictionaryKeyCallBacks
, (const CFDictionaryValueCallBacks
*)NULL
);
115 const CFStringEncoding
*encodings
= CFStringGetListOfAvailableEncodings();
117 while (*encodings
!= kCFStringEncodingInvalidId
) {
118 const char **nameList
= CFStringEncodingCanonicalCharsetNames(*encodings
);
122 CFStringRef name
= CFStringCreateWithCString(NULL
, *nameList
++, kCFStringEncodingASCII
);
125 CFDictionaryAddValue(table
, (const void*)name
, (const void*)*encodings
);
132 // Adding Unicode (UCS-2) names
133 CFDictionaryAddValue(table
, (const void*)CFSTR("unicode-1-1"), (const void*)kCFStringEncodingUnicode
);
134 CFDictionaryAddValue(table
, (const void*)CFSTR("utf-16"), (const void*)kCFStringEncodingUnicode
);
135 CFDictionaryAddValue(table
, (const void*)CFSTR("iso-10646-ucs-2"), (const void*)kCFStringEncodingUnicode
);
137 mappingTable
= table
;
140 if (CFDictionaryContainsKey(mappingTable
, (const void*)lowerCharsetName
)) {
141 result
= (CFStringEncoding
)CFDictionaryGetValue(mappingTable
, (const void*)lowerCharsetName
);
144 CFRelease(lowerCharsetName
);
149 CFStringRef
CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding
) {
150 static CFMutableDictionaryRef mappingTable
= NULL
;
151 CFStringRef theName
= mappingTable
? (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)encoding
) : NULL
;
154 if (encoding
== kCFStringEncodingUnicode
) {
155 theName
= CFSTR("UTF-16BE");
157 const char **nameList
= CFStringEncodingCanonicalCharsetNames(encoding
);
159 if (nameList
&& *nameList
) {
160 CFMutableStringRef upperCaseName
;
162 theName
= CFStringCreateWithCString(NULL
, *nameList
, kCFStringEncodingASCII
);
164 upperCaseName
= CFStringCreateMutableCopy(NULL
, 0, theName
);
165 CFStringUppercase(upperCaseName
, 0);
167 theName
= upperCaseName
;
173 if (!mappingTable
) mappingTable
= CFDictionaryCreateMutable(NULL
, 0, (const CFDictionaryKeyCallBacks
*)NULL
, &kCFTypeDictionaryValueCallBacks
);
175 CFDictionaryAddValue(mappingTable
, (const void*)encoding
, (const void*)theName
);
184 NSASCIIStringEncoding
= 1, /* 0..127 only */
185 NSNEXTSTEPStringEncoding
= 2,
186 NSJapaneseEUCStringEncoding
= 3,
187 NSUTF8StringEncoding
= 4,
188 NSISOLatin1StringEncoding
= 5,
189 NSSymbolStringEncoding
= 6,
190 NSNonLossyASCIIStringEncoding
= 7,
191 NSShiftJISStringEncoding
= 8,
192 NSISOLatin2StringEncoding
= 9,
193 NSUnicodeStringEncoding
= 10,
194 NSWindowsCP1251StringEncoding
= 11, /* Cyrillic; same as AdobeStandardCyrillic */
195 NSWindowsCP1252StringEncoding
= 12, /* WinLatin1 */
196 NSWindowsCP1253StringEncoding
= 13, /* Greek */
197 NSWindowsCP1254StringEncoding
= 14, /* Turkish */
198 NSWindowsCP1250StringEncoding
= 15, /* WinLatin2 */
199 NSISO2022JPStringEncoding
= 21, /* ISO 2022 Japanese encoding for e-mail */
200 NSMacOSRomanStringEncoding
= 30,
202 NSProprietaryStringEncoding
= 65536 /* Installation-specific encoding */
205 #define NSENCODING_MASK (1 << 31)
207 UInt32
CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding
) {
208 if (theEncoding
== kCFStringEncodingUTF8
) {
209 return NSUTF8StringEncoding
;
211 theEncoding
&= 0xFFF;
213 switch (theEncoding
) {
214 case kCFStringEncodingASCII
: return NSASCIIStringEncoding
;
215 case kCFStringEncodingNextStepLatin
: return NSNEXTSTEPStringEncoding
;
216 case kCFStringEncodingISOLatin1
: return NSISOLatin1StringEncoding
;
217 case kCFStringEncodingNonLossyASCII
: return NSNonLossyASCIIStringEncoding
;
218 case kCFStringEncodingUnicode
: return NSUnicodeStringEncoding
;
219 case kCFStringEncodingWindowsLatin1
: return NSWindowsCP1252StringEncoding
;
220 case kCFStringEncodingMacRoman
: return NSMacOSRomanStringEncoding
;
222 return NSENCODING_MASK
| theEncoding
;
226 CFStringEncoding
CFStringConvertNSStringEncodingToEncoding(UInt32 theEncoding
) {
227 switch (theEncoding
) {
228 case NSASCIIStringEncoding
: return kCFStringEncodingASCII
;
229 case NSNEXTSTEPStringEncoding
: return kCFStringEncodingNextStepLatin
;
230 case NSUTF8StringEncoding
: return kCFStringEncodingUTF8
;
231 case NSISOLatin1StringEncoding
: return kCFStringEncodingISOLatin1
;
232 case NSNonLossyASCIIStringEncoding
: return kCFStringEncodingNonLossyASCII
;
233 case NSUnicodeStringEncoding
: return kCFStringEncodingUnicode
;
234 case NSWindowsCP1252StringEncoding
: return kCFStringEncodingWindowsLatin1
;
235 case NSMacOSRomanStringEncoding
: return kCFStringEncodingMacRoman
;
237 return ((theEncoding
& NSENCODING_MASK
) ? theEncoding
& ~NSENCODING_MASK
: kCFStringEncodingInvalidId
);
241 #define MACCODEPAGE_BASE (10000)
242 #define ISO8859CODEPAGE_BASE (28590)
244 static const uint16_t _CFToDOSCodePageList
[] = {
245 437, -1, -1, -1, -1, 737, 775, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x400
246 850, 851, 852, 855, 857, 860, 861, 862, 863, 864, 865, 866, 869, 874, -1, 01, // 0x410
247 932, 936, 949 , 950, // 0x420
250 static const uint16_t _CFToWindowsCodePageList
[] = {
251 1252, 1250, 1251, 1253, 1254, 1255, 1256, 1257, 1361,
254 UInt32
CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding
) {
255 if (theEncoding
== kCFStringEncodingUTF8
) {
258 theEncoding
&= 0xFFF;
260 return kCFStringEncodingInvalidId
;
263 static const struct {
266 } _CFACPToCFTable
[] = {
300 static SInt32
bsearchEncoding(unsigned short target
) {
301 const unsigned int *start
, *end
, *divider
;
302 unsigned int size
= sizeof(_CFACPToCFTable
) / sizeof(UInt32
);
304 start
= (const unsigned int*)_CFACPToCFTable
; end
= (const unsigned int*)_CFACPToCFTable
+ (size
- 1);
305 while (start
<= end
) {
306 divider
= start
+ ((end
- start
) / 2);
308 if (*(const unsigned short*)divider
== target
) return *((const unsigned short*)divider
+ 1);
309 else if (*(const unsigned short*)divider
> target
) end
= divider
- 1;
310 else if (*(const unsigned short*)(divider
+ 1) > target
) return *((const unsigned short*)divider
+ 1);
311 else start
= divider
+ 1;
313 return (kCFStringEncodingInvalidId
);
316 CFStringEncoding
CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding
) {
317 if (theEncoding
== 0 || theEncoding
== 1) { // ID for default (system) codepage
318 return CFStringGetSystemEncoding();
319 } else if (theEncoding
< MACCODEPAGE_BASE
) { // MS CodePage
320 return bsearchEncoding(theEncoding
);
321 } else if (theEncoding
< 20000) { // MAC ScriptCode
322 return theEncoding
- MACCODEPAGE_BASE
;
323 } else if ((theEncoding
- ISO8859CODEPAGE_BASE
) < 10) { // ISO8859 range
324 return (theEncoding
- ISO8859CODEPAGE_BASE
) + 0x200;
326 switch (theEncoding
) {
327 case 65001: return kCFStringEncodingUTF8
;
328 case 20127: return kCFStringEncodingASCII
;
329 default: return kCFStringEncodingInvalidId
;
334 CFStringEncoding
CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding
) {
335 CFStringEncoding macEncoding
;
337 macEncoding
= CFStringEncodingGetScriptCodeForEncoding(encoding
);