]> git.saurik.com Git - apple/cf.git/blob - String.subproj/CFStringUtilities.c
51acaba1724fc43e7abdc940a151f012e5863f15
[apple/cf.git] / String.subproj / CFStringUtilities.c
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* CFStringUtilities.c
26 Copyright 1999-2002, Apple, Inc. All rights reserved.
27 Responsibility: Aki Inoue
28 */
29
30 #include "CFInternal.h"
31 #include "CFStringEncodingConverterExt.h"
32 #include "CFUniChar.h"
33 #include <limits.h>
34 #if defined(__MACH__) || defined(__LINUX__)
35 #include <stdlib.h>
36 #elif defined(__WIN32__)
37 #include <stdlib.h>
38 #include <tchar.h>
39 #endif
40
41
42
43 Boolean CFStringIsEncodingAvailable(CFStringEncoding theEncoding) {
44 switch (theEncoding) {
45 case kCFStringEncodingASCII: // Built-in encodings
46 case kCFStringEncodingMacRoman:
47 case kCFStringEncodingUnicode:
48 case kCFStringEncodingUTF8:
49 case kCFStringEncodingNonLossyASCII:
50 case kCFStringEncodingWindowsLatin1:
51 case kCFStringEncodingNextStepLatin:
52 return true;
53
54 default:
55 return CFStringEncodingIsValidEncoding(theEncoding);
56 }
57 }
58
59 const CFStringEncoding* CFStringGetListOfAvailableEncodings() {
60 return CFStringEncodingListOfAvailableEncodings();
61 }
62
63 CFStringRef CFStringGetNameOfEncoding(CFStringEncoding theEncoding) {
64 static CFMutableDictionaryRef mappingTable = NULL;
65 CFStringRef theName = mappingTable ? CFDictionaryGetValue(mappingTable, (const void*)theEncoding) : NULL;
66
67 if (!theName) {
68 if (theEncoding == kCFStringEncodingUnicode) {
69 theName = CFSTR("Unicode (UTF-16)");
70 } else if (theEncoding == kCFStringEncodingUTF8) {
71 theName = CFSTR("Unicode (UTF-8)");
72 } else if (theEncoding == kCFStringEncodingNonLossyASCII) {
73 theName = CFSTR("Non-lossy ASCII");
74 } else {
75 const uint8_t *encodingName = CFStringEncodingName(theEncoding);
76
77 if (encodingName) {
78 theName = CFStringCreateWithCString(NULL, encodingName, kCFStringEncodingASCII);
79 }
80
81 if (theName) {
82 if (!mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks);
83
84 CFDictionaryAddValue(mappingTable, (const void*)theEncoding, (const void*)theName);
85 CFRelease(theName);
86 }
87 }
88 }
89
90 return theName;
91 }
92
93 CFStringEncoding CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName) {
94 static CFMutableDictionaryRef mappingTable = NULL;
95 CFStringEncoding result = kCFStringEncodingInvalidId;
96 CFMutableStringRef lowerCharsetName = CFStringCreateMutableCopy(NULL, 0, charsetName);
97
98 /* Create lowercase copy */
99 CFStringLowercase(lowerCharsetName, NULL);
100
101 /* Check for common encodings first */
102 if (CFStringCompare(lowerCharsetName, CFSTR("utf-8"), kCFCompareCaseInsensitive) == kCFCompareEqualTo) {
103 CFRelease(lowerCharsetName);
104 return kCFStringEncodingUTF8;
105 } else if (CFStringCompare(lowerCharsetName, CFSTR("iso-8859-1"), kCFCompareCaseInsensitive) == kCFCompareEqualTo) {
106 CFRelease(lowerCharsetName);
107 return kCFStringEncodingISOLatin1;
108 } else if (CFStringCompare(lowerCharsetName, CFSTR("utf-16be"), kCFCompareCaseInsensitive) == kCFCompareEqualTo) {
109 CFRelease(lowerCharsetName);
110 return kCFStringEncodingUnicode;
111 }
112
113 if (mappingTable == NULL) {
114 CFMutableDictionaryRef table = CFDictionaryCreateMutable(NULL, 0, &kCFTypeDictionaryKeyCallBacks, (const CFDictionaryValueCallBacks *)NULL);
115 const CFStringEncoding *encodings = CFStringGetListOfAvailableEncodings();
116
117 while (*encodings != kCFStringEncodingInvalidId) {
118 const char **nameList = CFStringEncodingCanonicalCharsetNames(*encodings);
119
120 if (nameList) {
121 while (*nameList) {
122 CFStringRef name = CFStringCreateWithCString(NULL, *nameList++, kCFStringEncodingASCII);
123
124 if (name) {
125 CFDictionaryAddValue(table, (const void*)name, (const void*)*encodings);
126 CFRelease(name);
127 }
128 }
129 }
130 encodings++;
131 }
132 // Adding Unicode (UCS-2) names
133 CFDictionaryAddValue(table, (const void*)CFSTR("unicode-1-1"), (const void*)kCFStringEncodingUnicode);
134 CFDictionaryAddValue(table, (const void*)CFSTR("utf-16"), (const void*)kCFStringEncodingUnicode);
135 CFDictionaryAddValue(table, (const void*)CFSTR("iso-10646-ucs-2"), (const void*)kCFStringEncodingUnicode);
136
137 mappingTable = table;
138 }
139
140 if (CFDictionaryContainsKey(mappingTable, (const void*)lowerCharsetName)) {
141 result = (CFStringEncoding)CFDictionaryGetValue(mappingTable, (const void*)lowerCharsetName);
142 }
143
144 CFRelease(lowerCharsetName);
145
146 return result;
147 }
148
149 CFStringRef CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding) {
150 static CFMutableDictionaryRef mappingTable = NULL;
151 CFStringRef theName = mappingTable ? (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)encoding) : NULL;
152
153 if (!theName) {
154 if (encoding == kCFStringEncodingUnicode) {
155 theName = CFSTR("UTF-16BE");
156 } else {
157 const char **nameList = CFStringEncodingCanonicalCharsetNames(encoding);
158
159 if (nameList && *nameList) {
160 CFMutableStringRef upperCaseName;
161
162 theName = CFStringCreateWithCString(NULL, *nameList, kCFStringEncodingASCII);
163 if (theName) {
164 upperCaseName = CFStringCreateMutableCopy(NULL, 0, theName);
165 CFStringUppercase(upperCaseName, 0);
166 CFRelease(theName);
167 theName = upperCaseName;
168 }
169 }
170 }
171
172 if (theName) {
173 if (!mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks);
174
175 CFDictionaryAddValue(mappingTable, (const void*)encoding, (const void*)theName);
176 CFRelease(theName);
177 }
178 }
179
180 return theName;
181 }
182
183 enum {
184 NSASCIIStringEncoding = 1, /* 0..127 only */
185 NSNEXTSTEPStringEncoding = 2,
186 NSJapaneseEUCStringEncoding = 3,
187 NSUTF8StringEncoding = 4,
188 NSISOLatin1StringEncoding = 5,
189 NSSymbolStringEncoding = 6,
190 NSNonLossyASCIIStringEncoding = 7,
191 NSShiftJISStringEncoding = 8,
192 NSISOLatin2StringEncoding = 9,
193 NSUnicodeStringEncoding = 10,
194 NSWindowsCP1251StringEncoding = 11, /* Cyrillic; same as AdobeStandardCyrillic */
195 NSWindowsCP1252StringEncoding = 12, /* WinLatin1 */
196 NSWindowsCP1253StringEncoding = 13, /* Greek */
197 NSWindowsCP1254StringEncoding = 14, /* Turkish */
198 NSWindowsCP1250StringEncoding = 15, /* WinLatin2 */
199 NSISO2022JPStringEncoding = 21, /* ISO 2022 Japanese encoding for e-mail */
200 NSMacOSRomanStringEncoding = 30,
201
202 NSProprietaryStringEncoding = 65536 /* Installation-specific encoding */
203 };
204
205 #define NSENCODING_MASK (1 << 31)
206
207 UInt32 CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding) {
208 if (theEncoding == kCFStringEncodingUTF8) {
209 return NSUTF8StringEncoding;
210 } else {
211 theEncoding &= 0xFFF;
212 }
213 switch (theEncoding) {
214 case kCFStringEncodingASCII: return NSASCIIStringEncoding;
215 case kCFStringEncodingNextStepLatin: return NSNEXTSTEPStringEncoding;
216 case kCFStringEncodingISOLatin1: return NSISOLatin1StringEncoding;
217 case kCFStringEncodingNonLossyASCII: return NSNonLossyASCIIStringEncoding;
218 case kCFStringEncodingUnicode: return NSUnicodeStringEncoding;
219 case kCFStringEncodingWindowsLatin1: return NSWindowsCP1252StringEncoding;
220 case kCFStringEncodingMacRoman: return NSMacOSRomanStringEncoding;
221 default:
222 return NSENCODING_MASK | theEncoding;
223 }
224 }
225
226 CFStringEncoding CFStringConvertNSStringEncodingToEncoding(UInt32 theEncoding) {
227 switch (theEncoding) {
228 case NSASCIIStringEncoding: return kCFStringEncodingASCII;
229 case NSNEXTSTEPStringEncoding: return kCFStringEncodingNextStepLatin;
230 case NSUTF8StringEncoding: return kCFStringEncodingUTF8;
231 case NSISOLatin1StringEncoding: return kCFStringEncodingISOLatin1;
232 case NSNonLossyASCIIStringEncoding: return kCFStringEncodingNonLossyASCII;
233 case NSUnicodeStringEncoding: return kCFStringEncodingUnicode;
234 case NSWindowsCP1252StringEncoding: return kCFStringEncodingWindowsLatin1;
235 case NSMacOSRomanStringEncoding: return kCFStringEncodingMacRoman;
236 default:
237 return ((theEncoding & NSENCODING_MASK) ? theEncoding & ~NSENCODING_MASK : kCFStringEncodingInvalidId);
238 }
239 }
240
241 #define MACCODEPAGE_BASE (10000)
242 #define ISO8859CODEPAGE_BASE (28590)
243
244 static const uint16_t _CFToDOSCodePageList[] = {
245 437, -1, -1, -1, -1, 737, 775, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x400
246 850, 851, 852, 855, 857, 860, 861, 862, 863, 864, 865, 866, 869, 874, -1, 01, // 0x410
247 932, 936, 949 , 950, // 0x420
248 };
249
250 static const uint16_t _CFToWindowsCodePageList[] = {
251 1252, 1250, 1251, 1253, 1254, 1255, 1256, 1257, 1361,
252 };
253
254 UInt32 CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding) {
255 if (theEncoding == kCFStringEncodingUTF8) {
256 return 65001;
257 } else {
258 theEncoding &= 0xFFF;
259 }
260 return kCFStringEncodingInvalidId;
261 }
262
263 static const struct {
264 uint16_t acp;
265 uint16_t encoding;
266 } _CFACPToCFTable[] = {
267 {437,0x0400},
268 {737,0x0405},
269 {775,0x0406},
270 {850,0x0410},
271 {851,0x0411},
272 {852,0x0412},
273 {855,0x0413},
274 {857,0x0414},
275 {860,0x0415},
276 {861,0x0416},
277 {862,0x0417},
278 {863,0x0418},
279 {864,0x0419},
280 {865,0x041A},
281 {866,0x041B},
282 {869,0x041C},
283 {874,0x041D},
284 {932,0x0420},
285 {936,0x0421},
286 {949,0x0422},
287 {950,0x0423},
288 {1250,0x0501},
289 {1251,0x0502},
290 {1252,0x0500},
291 {1253,0x0503},
292 {1254,0x0504},
293 {1255,0x0505},
294 {1256,0x0506},
295 {1257,0x0507},
296 {1361,0x0510},
297 {0xFFFF,0xFFFF},
298 };
299
300 static SInt32 bsearchEncoding(unsigned short target) {
301 const unsigned int *start, *end, *divider;
302 unsigned int size = sizeof(_CFACPToCFTable) / sizeof(UInt32);
303
304 start = (const unsigned int*)_CFACPToCFTable; end = (const unsigned int*)_CFACPToCFTable + (size - 1);
305 while (start <= end) {
306 divider = start + ((end - start) / 2);
307
308 if (*(const unsigned short*)divider == target) return *((const unsigned short*)divider + 1);
309 else if (*(const unsigned short*)divider > target) end = divider - 1;
310 else if (*(const unsigned short*)(divider + 1) > target) return *((const unsigned short*)divider + 1);
311 else start = divider + 1;
312 }
313 return (kCFStringEncodingInvalidId);
314 }
315
316 CFStringEncoding CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding) {
317 if (theEncoding == 0 || theEncoding == 1) { // ID for default (system) codepage
318 return CFStringGetSystemEncoding();
319 } else if (theEncoding < MACCODEPAGE_BASE) { // MS CodePage
320 return bsearchEncoding(theEncoding);
321 } else if (theEncoding < 20000) { // MAC ScriptCode
322 return theEncoding - MACCODEPAGE_BASE;
323 } else if ((theEncoding - ISO8859CODEPAGE_BASE) < 10) { // ISO8859 range
324 return (theEncoding - ISO8859CODEPAGE_BASE) + 0x200;
325 } else {
326 switch (theEncoding) {
327 case 65001: return kCFStringEncodingUTF8;
328 case 20127: return kCFStringEncodingASCII;
329 default: return kCFStringEncodingInvalidId;
330 }
331 }
332 }
333
334 CFStringEncoding CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding) {
335 CFStringEncoding macEncoding;
336
337 macEncoding = CFStringEncodingGetScriptCodeForEncoding(encoding);
338
339 return macEncoding;
340 }
341