]> git.saurik.com Git - apple/cf.git/blob - StringEncodings.subproj/CFStringEncodingConverter.c
CF-299.35.tar.gz
[apple/cf.git] / StringEncodings.subproj / CFStringEncodingConverter.c
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* CFStringEncodingConverter.c
26 Copyright 1998-2002, Apple, Inc. All rights reserved.
27 Responsibility: Aki Inoue
28 */
29
30 #include "CFInternal.h"
31 #include <CoreFoundation/CFArray.h>
32 #include <CoreFoundation/CFDictionary.h>
33 #include "CFUniChar.h"
34 #include "CFUtilities.h"
35 #include "CFUnicodeDecomposition.h"
36 #include "CFStringEncodingConverterExt.h"
37 #include "CFStringEncodingConverterPriv.h"
38 #include <stdlib.h>
39 #if !defined(__MACOS8__)
40 #ifdef __WIN32__
41 #include <windows.h>
42 #else // Mach, HP-UX, Solaris
43 #include <pthread.h>
44 #endif
45 #endif __MACOS8__
46
47
48 /* Macros
49 */
50 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->_toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->toBytes)(flags,chars,numChars,bytes,max,used))
51 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->_toUnicode ? (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->toUnicode)(flags,bytes,numBytes,chars,max,used))
52
53 #define LineSeparator 0x2028
54 #define ParagraphSeparator 0x2029
55 #define ASCIINewLine 0x0a
56 #define kSurrogateHighStart 0xD800
57 #define kSurrogateHighEnd 0xDBFF
58 #define kSurrogateLowStart 0xDC00
59 #define kSurrogateLowEnd 0xDFFF
60
61 /* Mapping 128..255 to lossy ASCII
62 */
63 static const struct {
64 unsigned char chars[4];
65 } _toLossyASCIITable[] = {
66 {{' ', 0, 0, 0}}, // NO-BREAK SPACE
67 {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
68 {{'c', 0, 0, 0}}, // CENT SIGN
69 {{'L', 0, 0, 0}}, // POUND SIGN
70 {{'$', 0, 0, 0}}, // CURRENCY SIGN
71 {{'Y', 0, 0, 0}}, // YEN SIGN
72 {{'|', 0, 0, 0}}, // BROKEN BAR
73 {{0, 0, 0, 0}}, // SECTION SIGN
74 {{0, 0, 0, 0}}, // DIAERESIS
75 {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
76 {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
77 {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
78 {{0, 0, 0, 0}}, // NOT SIGN
79 {{'-', 0, 0, 0}}, // SOFT HYPHEN
80 {{'(', 'R', ')', 0}}, // REGISTERED SIGN
81 {{0, 0, 0, 0}}, // MACRON
82 {{0, 0, 0, 0}}, // DEGREE SIGN
83 {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
84 {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
85 {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
86 {{0, 0, 0, 0}}, // ACUTE ACCENT
87 {{0, 0, 0, 0}}, // MICRO SIGN
88 {{0, 0, 0, 0}}, // PILCROW SIGN
89 {{0, 0, 0, 0}}, // MIDDLE DOT
90 {{0, 0, 0, 0}}, // CEDILLA
91 {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
92 {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
93 {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
94 {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
95 {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
96 {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
97 {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
98 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
99 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
100 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
101 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
102 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
103 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
104 {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
105 {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
106 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
107 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
108 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
109 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
110 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
111 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
112 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
113 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
114 {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
115 {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
116 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
117 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
118 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
119 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
120 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
121 {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
122 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
123 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
124 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
125 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
126 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
127 {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
128 {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
129 {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
130 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
131 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
132 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
133 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
134 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
135 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
136 {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
137 {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
138 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
139 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
140 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
141 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
142 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
143 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
144 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
145 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
146 {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
147 {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
148 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
149 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
150 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
151 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
152 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
153 {{'/', 0, 0, 0}}, // DIVISION SIGN
154 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
155 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
156 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
157 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
158 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
159 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
160 {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
161 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
162 };
163
164 CF_INLINE UInt32 __CFToASCIILatin1Fallback(UniChar character, UInt8 *bytes, UInt32 maxByteLen) {
165 const char *losChars = (const unsigned char*)_toLossyASCIITable + (character - 0xA0) * sizeof(unsigned char[4]);
166 unsigned int numBytes = 0;
167 int idx, max = (maxByteLen && (maxByteLen < 4) ? maxByteLen : 4);
168
169 for (idx = 0;idx < max;idx++) {
170 if (losChars[idx]) {
171 if (maxByteLen) bytes[idx] = losChars[idx];
172 ++numBytes;
173 } else {
174 break;
175 }
176 }
177
178 return numBytes;
179 }
180
181 static UInt32 __CFDefaultToBytesFallbackProc(const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
182 if (*characters < 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
183 if (maxByteLen) *bytes = (UInt8)(*characters - 0x80);
184 *usedByteLen = 1;
185 return 1;
186 } else if (*characters < 0x100) {
187 *usedByteLen = __CFToASCIILatin1Fallback(*characters, bytes, maxByteLen);
188 return 1;
189 } else if (*characters >= kSurrogateHighStart && *characters <= kSurrogateLowEnd) {
190 if (maxByteLen) *bytes = '?';
191 *usedByteLen = 1;
192 return (numChars > 1 && *characters <= kSurrogateLowStart && *(characters + 1) >= kSurrogateLowStart && *(characters + 1) <= kSurrogateLowEnd ? 2 : 1);
193 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceCharacterSet)) {
194 if (maxByteLen) *bytes = ' ';
195 *usedByteLen = 1;
196 return 1;
197 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceAndNewlineCharacterSet)) {
198 if (maxByteLen) *bytes = ASCIINewLine;
199 *usedByteLen = 1;
200 return 1;
201 } else if (!CFUniCharIsMemberOf(*characters, kCFUniCharLetterCharacterSet)) {
202 *usedByteLen = 0;
203 return 1;
204 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharDecomposableCharacterSet)) {
205 UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
206
207 (void)CFUniCharDecomposeCharacter(*characters, decomposed, MAX_DECOMPOSED_LENGTH);
208 if (*decomposed < 0x80) {
209 if (maxByteLen) *bytes = (UInt8)(*decomposed);
210 *usedByteLen = 1;
211 return 1;
212 } else {
213 UTF16Char theChar = *decomposed;
214
215 return __CFDefaultToBytesFallbackProc(&theChar, 1, bytes, maxByteLen, usedByteLen);
216 }
217 } else {
218 if (maxByteLen) *bytes = '?';
219 *usedByteLen = 1;
220 return 1;
221 }
222 }
223
224 static UInt32 __CFDefaultToUnicodeFallbackProc(const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
225 if (maxCharLen) *characters = (UniChar)'?';
226 *usedCharLen = 1;
227 return 1;
228 }
229
230 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
231 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
232
233 #define EXTRA_BASE (0x0F00)
234
235 /* Wrapper funcs for non-standard converters
236 */
237 static UInt32 __CFToBytesCheapEightBitWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
238 UInt32 processedCharLen = 0;
239 UInt32 length = (maxByteLen && (maxByteLen < numChars) ? maxByteLen : numChars);
240 uint8_t byte;
241
242 while (processedCharLen < length) {
243 if (!((CFStringEncodingCheapEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], &byte)) break;
244
245 if (maxByteLen) bytes[processedCharLen] = byte;
246 processedCharLen++;
247 }
248
249 *usedByteLen = processedCharLen;
250 return processedCharLen;
251 }
252
253 static UInt32 __CFToUnicodeCheapEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
254 UInt32 processedByteLen = 0;
255 UInt32 length = (maxCharLen && (maxCharLen < numBytes) ? maxCharLen : numBytes);
256 UniChar character;
257
258 while (processedByteLen < length) {
259 if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
260
261 if (maxCharLen) characters[processedByteLen] = character;
262 processedByteLen++;
263 }
264
265 *usedCharLen = processedByteLen;
266 return processedByteLen;
267 }
268
269 static UInt32 __CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
270 UInt32 processedByteLen = 0;
271 UInt32 theUsedCharLen = 0;
272 UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
273 UInt32 usedLen;
274 UniChar character;
275 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
276
277 while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
278 if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
279
280 if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
281 uint32_t idx;
282
283 usedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
284 *usedCharLen = theUsedCharLen;
285
286 for (idx = 0;idx < usedLen;idx++) {
287 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
288 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
289 theUsedCharLen += 2;
290 if (maxCharLen) {
291 charBuffer[idx] = charBuffer[idx] - 0x10000;
292 *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
293 *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
294 }
295 } else {
296 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
297 ++theUsedCharLen;
298 *(characters++) = charBuffer[idx];
299 }
300 }
301 } else {
302 if (maxCharLen) *(characters++) = character;
303 ++theUsedCharLen;
304 }
305 processedByteLen++;
306 }
307
308 *usedCharLen = theUsedCharLen;
309 return processedByteLen;
310 }
311
312 static UInt32 __CFToBytesStandardEightBitWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
313 UInt32 processedCharLen = 0;
314 uint8_t byte;
315 UInt32 usedLen;
316
317 *usedByteLen = 0;
318
319 while (numChars && (!maxByteLen || (*usedByteLen < maxByteLen))) {
320 if (!(usedLen = ((CFStringEncodingStandardEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters, numChars, &byte))) break;
321
322 if (maxByteLen) bytes[*usedByteLen] = byte;
323 (*usedByteLen)++;
324 characters += usedLen;
325 numChars -= usedLen;
326 processedCharLen += usedLen;
327 }
328
329 return processedCharLen;
330 }
331
332 static UInt32 __CFToUnicodeStandardEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
333 UInt32 processedByteLen = 0;
334 #if defined(__MACOS8__) || defined(__WIN32__)
335 UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
336 #else
337 UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
338 #endif
339 UInt32 usedLen;
340
341 *usedCharLen = 0;
342
343 while ((processedByteLen < numBytes) && (!maxCharLen || (*usedCharLen < maxCharLen))) {
344 if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
345
346 if (maxCharLen) {
347 uint16_t idx;
348
349 if (*usedCharLen + usedLen > maxCharLen) break;
350
351 for (idx = 0;idx < usedLen;idx++) {
352 characters[*usedCharLen + idx] = charBuffer[idx];
353 }
354 }
355 *usedCharLen += usedLen;
356 processedByteLen++;
357 }
358
359 return processedByteLen;
360 }
361
362 static UInt32 __CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
363 UInt32 processedByteLen = 0;
364 #if defined(__MACOS8__) || defined(__WIN32__)
365 UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
366 #else
367 UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
368 #endif
369 UTF32Char decompBuffer[MAX_DECOMPOSED_LENGTH];
370 UInt32 usedLen;
371 UInt32 decompedLen;
372 UInt32 idx, decompIndex;
373 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
374 UInt32 theUsedCharLen = 0;
375
376 while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
377 if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
378
379 for (idx = 0;idx < usedLen;idx++) {
380 if (CFUniCharIsDecomposableCharacter(charBuffer[idx], isHFSPlus)) {
381 decompedLen = CFUniCharDecomposeCharacter(charBuffer[idx], decompBuffer, MAX_DECOMPOSED_LENGTH);
382 *usedCharLen = theUsedCharLen;
383
384 for (decompIndex = 0;decompIndex < decompedLen;decompIndex++) {
385 if (decompBuffer[decompIndex] > 0xFFFF) { // Non-BMP
386 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
387 theUsedCharLen += 2;
388 if (maxCharLen) {
389 charBuffer[idx] = charBuffer[idx] - 0x10000;
390 *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
391 *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
392 }
393 } else {
394 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
395 ++theUsedCharLen;
396 *(characters++) = charBuffer[idx];
397 }
398 }
399 } else {
400 if (maxCharLen) *(characters++) = charBuffer[idx];
401 ++theUsedCharLen;
402 }
403 }
404 processedByteLen++;
405 }
406
407 *usedCharLen = theUsedCharLen;
408 return processedByteLen;
409 }
410
411 static UInt32 __CFToBytesCheapMultiByteWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
412 UInt32 processedCharLen = 0;
413 #if defined(__MACOS8__) || defined(__WIN32__)
414 uint8_t byteBuffer[20]; // Dynamic stack allocation is GNU specific
415 #else
416 uint8_t byteBuffer[((const _CFEncodingConverter*)converter)->maxLen];
417 #endif
418 UInt32 usedLen;
419
420 *usedByteLen = 0;
421
422 while ((processedCharLen < numChars) && (!maxByteLen || (*usedByteLen < maxByteLen))) {
423 if (!(usedLen = ((CFStringEncodingCheapMultiByteToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], byteBuffer))) break;
424
425 if (maxByteLen) {
426 uint16_t idx;
427
428 if (*usedByteLen + usedLen > maxByteLen) break;
429
430 for (idx = 0;idx <usedLen;idx++) {
431 bytes[*usedByteLen + idx] = byteBuffer[idx];
432 }
433 }
434
435 *usedByteLen += usedLen;
436 processedCharLen++;
437 }
438
439 return processedCharLen;
440 }
441
442 static UInt32 __CFToUnicodeCheapMultiByteWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
443 UInt32 processedByteLen = 0;
444 UniChar character;
445 UInt32 usedLen;
446
447 *usedCharLen = 0;
448
449 while (numBytes && (!maxCharLen || (*usedCharLen < maxCharLen))) {
450 if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
451
452 if (maxCharLen) *(characters++) = character;
453 (*usedCharLen)++;
454 processedByteLen += usedLen;
455 bytes += usedLen;
456 numBytes -= usedLen;
457 }
458
459 return processedByteLen;
460 }
461
462 static UInt32 __CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
463 UInt32 processedByteLen = 0;
464 UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
465 UniChar character;
466 UInt32 usedLen;
467 UInt32 decomposedLen;
468 UInt32 theUsedCharLen = 0;
469 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
470
471 while (numBytes && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
472 if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
473
474 if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
475 uint32_t idx;
476
477 decomposedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
478 *usedCharLen = theUsedCharLen;
479
480 for (idx = 0;idx < decomposedLen;idx++) {
481 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
482 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
483 theUsedCharLen += 2;
484 if (maxCharLen) {
485 charBuffer[idx] = charBuffer[idx] - 0x10000;
486 *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
487 *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
488 }
489 } else {
490 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
491 ++theUsedCharLen;
492 *(characters++) = charBuffer[idx];
493 }
494 }
495 } else {
496 if (maxCharLen) *(characters++) = character;
497 ++theUsedCharLen;
498 }
499
500 processedByteLen += usedLen;
501 bytes += usedLen;
502 numBytes -= usedLen;
503 }
504 *usedCharLen = theUsedCharLen;
505 return processedByteLen;
506 }
507
508 /* static functions
509 */
510 static _CFConverterEntry __CFConverterEntryASCII = {
511 kCFStringEncodingASCII, NULL,
512 "Western (ASCII)", {"us-ascii", "ascii", "iso-646-us", NULL}, NULL, NULL, NULL, NULL,
513 kCFStringEncodingMacRoman // We use string encoding's script range here
514 };
515
516 static _CFConverterEntry __CFConverterEntryISOLatin1 = {
517 kCFStringEncodingISOLatin1, NULL,
518 "Western (ISO Latin 1)", {"iso-8859-1", "latin1","iso-latin-1", NULL}, NULL, NULL, NULL, NULL,
519 kCFStringEncodingMacRoman // We use string encoding's script range here
520 };
521
522 static _CFConverterEntry __CFConverterEntryMacRoman = {
523 kCFStringEncodingMacRoman, NULL,
524 "Western (Mac OS Roman)", {"macintosh", "mac", "x-mac-roman", NULL}, NULL, NULL, NULL, NULL,
525 kCFStringEncodingMacRoman // We use string encoding's script range here
526 };
527
528 static _CFConverterEntry __CFConverterEntryWinLatin1 = {
529 kCFStringEncodingWindowsLatin1, NULL,
530 "Western (Windows Latin 1)", {"windows-1252", "cp1252", "windows latin1", NULL}, NULL, NULL, NULL, NULL,
531 kCFStringEncodingMacRoman // We use string encoding's script range here
532 };
533
534 static _CFConverterEntry __CFConverterEntryNextStepLatin = {
535 kCFStringEncodingNextStepLatin, NULL,
536 "Western (NextStep)", {"x-nextstep", NULL, NULL, NULL}, NULL, NULL, NULL, NULL,
537 kCFStringEncodingMacRoman // We use string encoding's script range here
538 };
539
540 static _CFConverterEntry __CFConverterEntryUTF8 = {
541 kCFStringEncodingUTF8, NULL,
542 "UTF-8", {"utf-8", "unicode-1-1-utf8", NULL, NULL}, NULL, NULL, NULL, NULL,
543 kCFStringEncodingUnicode // We use string encoding's script range here
544 };
545
546 CF_INLINE _CFConverterEntry *__CFStringEncodingConverterGetEntry(UInt32 encoding) {
547 switch (encoding) {
548 case kCFStringEncodingInvalidId:
549 case kCFStringEncodingASCII:
550 return &__CFConverterEntryASCII;
551
552 case kCFStringEncodingISOLatin1:
553 return &__CFConverterEntryISOLatin1;
554
555 case kCFStringEncodingMacRoman:
556 return &__CFConverterEntryMacRoman;
557
558 case kCFStringEncodingWindowsLatin1:
559 return &__CFConverterEntryWinLatin1;
560
561 case kCFStringEncodingNextStepLatin:
562 return &__CFConverterEntryNextStepLatin;
563
564 case kCFStringEncodingUTF8:
565 return &__CFConverterEntryUTF8;
566
567 default: return NULL;
568 }
569 }
570
571 CF_INLINE _CFEncodingConverter *__CFEncodingConverterFromDefinition(const CFStringEncodingConverter *definition) {
572 #define NUM_OF_ENTRIES_CYCLE (10)
573 static CFSpinLock_t _indexLock = 0;
574 static UInt32 _currentIndex = 0;
575 static UInt32 _allocatedSize = 0;
576 static _CFEncodingConverter *_allocatedEntries = NULL;
577 _CFEncodingConverter *converter;
578
579
580 __CFSpinLock(&_indexLock);
581 if ((_currentIndex + 1) >= _allocatedSize) {
582 _currentIndex = 0;
583 _allocatedSize = 0;
584 _allocatedEntries = NULL;
585 }
586 if (_allocatedEntries == NULL) { // Not allocated yet
587 _allocatedEntries = (_CFEncodingConverter *)CFAllocatorAllocate(NULL, sizeof(_CFEncodingConverter) * NUM_OF_ENTRIES_CYCLE, 0);
588 _allocatedSize = NUM_OF_ENTRIES_CYCLE;
589 converter = &(_allocatedEntries[_currentIndex]);
590 } else {
591 converter = &(_allocatedEntries[++_currentIndex]);
592 }
593 __CFSpinUnlock(&_indexLock);
594
595 switch (definition->encodingClass) {
596 case kCFStringEncodingConverterStandard:
597 converter->toBytes = definition->toBytes;
598 converter->toUnicode = definition->toUnicode;
599 converter->toCanonicalUnicode = definition->toUnicode;
600 converter->_toBytes = NULL;
601 converter->_toUnicode = NULL;
602 converter->maxLen = 2;
603 break;
604
605 case kCFStringEncodingConverterCheapEightBit:
606 converter->toBytes = __CFToBytesCheapEightBitWrapper;
607 converter->toUnicode = __CFToUnicodeCheapEightBitWrapper;
608 converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapEightBitWrapper;
609 converter->_toBytes = definition->toBytes;
610 converter->_toUnicode = definition->toUnicode;
611 converter->maxLen = 1;
612 break;
613
614 case kCFStringEncodingConverterStandardEightBit:
615 converter->toBytes = __CFToBytesStandardEightBitWrapper;
616 converter->toUnicode = __CFToUnicodeStandardEightBitWrapper;
617 converter->toCanonicalUnicode = __CFToCanonicalUnicodeStandardEightBitWrapper;
618 converter->_toBytes = definition->toBytes;
619 converter->_toUnicode = definition->toUnicode;
620 converter->maxLen = definition->maxDecomposedCharLen;
621 break;
622
623 case kCFStringEncodingConverterCheapMultiByte:
624 converter->toBytes = __CFToBytesCheapMultiByteWrapper;
625 converter->toUnicode = __CFToUnicodeCheapMultiByteWrapper;
626 converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapMultiByteWrapper;
627 converter->_toBytes = definition->toBytes;
628 converter->_toUnicode = definition->toUnicode;
629 converter->maxLen = definition->maxBytesPerChar;
630 break;
631
632 case kCFStringEncodingConverterPlatformSpecific:
633 converter->toBytes = NULL;
634 converter->toUnicode = NULL;
635 converter->toCanonicalUnicode = NULL;
636 converter->_toBytes = NULL;
637 converter->_toUnicode = NULL;
638 converter->maxLen = 0;
639 converter->toBytesLen = NULL;
640 converter->toUnicodeLen = NULL;
641 converter->toBytesFallback = NULL;
642 converter->toUnicodeFallback = NULL;
643 converter->toBytesPrecompose = NULL;
644 converter->isValidCombiningChar = NULL;
645 return converter;
646
647 default: // Shouln't be here
648 return NULL;
649 }
650
651 converter->toBytesLen = (definition->toBytesLen ? definition->toBytesLen : (CFStringEncodingToBytesLenProc)(UInt32)definition->maxBytesPerChar);
652 converter->toUnicodeLen = (definition->toUnicodeLen ? definition->toUnicodeLen : (CFStringEncodingToUnicodeLenProc)(UInt32)definition->maxDecomposedCharLen);
653 converter->toBytesFallback = (definition->toBytesFallback ? definition->toBytesFallback : __CFDefaultToBytesFallbackProc);
654 converter->toUnicodeFallback = (definition->toUnicodeFallback ? definition->toUnicodeFallback : __CFDefaultToUnicodeFallbackProc);
655 converter->toBytesPrecompose = (definition->toBytesPrecompose ? definition->toBytesPrecompose : NULL);
656 converter->isValidCombiningChar = (definition->isValidCombiningChar ? definition->isValidCombiningChar : NULL);
657
658 return converter;
659 }
660
661 CF_INLINE const CFStringEncodingConverter *__CFStringEncodingConverterGetDefinition(_CFConverterEntry *entry) {
662 if (!entry) return NULL;
663
664 switch (entry->encoding) {
665 case kCFStringEncodingASCII:
666 return &__CFConverterASCII;
667
668 case kCFStringEncodingISOLatin1:
669 return &__CFConverterISOLatin1;
670
671 case kCFStringEncodingMacRoman:
672 return &__CFConverterMacRoman;
673
674 case kCFStringEncodingWindowsLatin1:
675 return &__CFConverterWinLatin1;
676
677 case kCFStringEncodingNextStepLatin:
678 return &__CFConverterNextStepLatin;
679
680 case kCFStringEncodingUTF8:
681 return &__CFConverterUTF8;
682
683 default:
684 return NULL;
685 }
686 }
687
688 static const _CFEncodingConverter *__CFGetConverter(UInt32 encoding) {
689 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
690
691 if (!entry) return NULL;
692
693 if (!entry->converter) {
694 const CFStringEncodingConverter *definition = __CFStringEncodingConverterGetDefinition(entry);
695
696 if (definition) {
697 entry->converter = __CFEncodingConverterFromDefinition(definition);
698 entry->toBytesFallback = definition->toBytesFallback;
699 entry->toUnicodeFallback = definition->toUnicodeFallback;
700 }
701 }
702
703 return (_CFEncodingConverter *)entry->converter;
704 }
705
706 /* Public API
707 */
708 UInt32 CFStringEncodingUnicodeToBytes(UInt32 encoding, UInt32 flags, const UniChar *characters, UInt32 numChars, UInt32 *usedCharLen, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
709 if (encoding == kCFStringEncodingUTF8) {
710 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
711 uint32_t convertedCharLen;
712 uint32_t usedLen;
713
714
715 if ((flags & kCFStringEncodingUseCanonical) || (flags & kCFStringEncodingUseHFSPlusCanonical)) {
716 (void)CFUniCharDecompose(characters, numChars, &convertedCharLen, (void *)bytes, maxByteLen, &usedLen, true, kCFUniCharUTF8Format, (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false));
717 } else {
718 if (!__CFToUTF8) {
719 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
720 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
721 }
722 convertedCharLen = __CFToUTF8(0, characters, numChars, bytes, maxByteLen, (UInt32 *)&usedLen);
723 }
724 if (usedCharLen) *usedCharLen = convertedCharLen;
725 if (usedByteLen) *usedByteLen = usedLen;
726
727 if (convertedCharLen == numChars) {
728 return kCFStringEncodingConversionSuccess;
729 } else if (maxByteLen && (maxByteLen == usedLen)) {
730 return kCFStringEncodingInsufficientOutputBufferLength;
731 } else {
732 return kCFStringEncodingInvalidInputStream;
733 }
734 } else {
735 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
736 UInt32 usedLen = 0;
737 UInt32 localUsedByteLen;
738 UInt32 theUsedByteLen = 0;
739 UInt32 theResult = kCFStringEncodingConversionSuccess;
740 CFStringEncodingToBytesPrecomposeProc toBytesPrecompose = NULL;
741 CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar = NULL;
742
743 if (!converter) return kCFStringEncodingConverterUnavailable;
744
745 if (flags & kCFStringEncodingSubstituteCombinings) {
746 if (!(flags & kCFStringEncodingAllowLossyConversion)) isValidCombiningChar = converter->isValidCombiningChar;
747 } else {
748 isValidCombiningChar = converter->isValidCombiningChar;
749 if (!(flags & kCFStringEncodingIgnoreCombinings)) {
750 toBytesPrecompose = converter->toBytesPrecompose;
751 flags |= kCFStringEncodingComposeCombinings;
752 }
753 }
754
755
756 while ((usedLen < numChars) && (!maxByteLen || (theUsedByteLen < maxByteLen))) {
757 if ((usedLen += TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) < numChars) {
758 UInt32 dummy;
759
760 if (isValidCombiningChar && (usedLen > 0) && isValidCombiningChar(characters[usedLen])) {
761 if (toBytesPrecompose) {
762 UInt32 localUsedLen = usedLen;
763
764 while (isValidCombiningChar(characters[--usedLen]));
765 theUsedByteLen += localUsedByteLen;
766 if (converter->maxLen > 1) {
767 TO_BYTE(converter, flags, characters + usedLen, localUsedLen - usedLen, NULL, 0, &localUsedByteLen);
768 theUsedByteLen -= localUsedByteLen;
769 } else {
770 theUsedByteLen--;
771 }
772 if ((localUsedLen = toBytesPrecompose(flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) > 0) {
773 usedLen += localUsedLen;
774 if ((usedLen < numChars) && isValidCombiningChar(characters[usedLen])) { // There is a non-base char not combined remaining
775 theUsedByteLen += localUsedByteLen;
776 theResult = kCFStringEncodingInvalidInputStream;
777 break;
778 }
779 } else if (flags & kCFStringEncodingAllowLossyConversion) {
780 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
781
782 if (lossyByte) {
783 while (isValidCombiningChar(characters[++usedLen]));
784 localUsedByteLen = 1;
785 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
786 } else {
787 ++usedLen;
788 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
789 }
790 } else {
791 theResult = kCFStringEncodingInvalidInputStream;
792 break;
793 }
794 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
795 theUsedByteLen += localUsedByteLen;
796 theResult = kCFStringEncodingInsufficientOutputBufferLength;
797 break;
798 } else if (flags & kCFStringEncodingIgnoreCombinings) {
799 while ((++usedLen < numChars) && isValidCombiningChar(characters[usedLen]));
800 } else {
801 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
802
803 theUsedByteLen += localUsedByteLen;
804 if (lossyByte) {
805 ++usedLen;
806 localUsedByteLen = 1;
807 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
808 } else {
809 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
810 }
811 }
812 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
813 theUsedByteLen += localUsedByteLen;
814
815 if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
816 UInt32 localUsedLen;
817
818 localUsedByteLen = 0;
819 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
820 }
821 if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
822 break;
823 } else if (flags & kCFStringEncodingAllowLossyConversion) {
824 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
825
826 theUsedByteLen += localUsedByteLen;
827 if (lossyByte) {
828 ++usedLen;
829 localUsedByteLen = 1;
830 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
831 } else {
832 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
833 }
834 } else {
835 theUsedByteLen += localUsedByteLen;
836 theResult = kCFStringEncodingInvalidInputStream;
837 break;
838 }
839 }
840 theUsedByteLen += localUsedByteLen;
841 }
842
843 if (usedLen < numChars && maxByteLen && theResult == kCFStringEncodingConversionSuccess) {
844 if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
845 UInt32 localUsedLen;
846
847 localUsedByteLen = 0;
848 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
849 }
850 if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
851 }
852 if (usedByteLen) *usedByteLen = theUsedByteLen;
853 if (usedCharLen) *usedCharLen = usedLen;
854
855 return theResult;
856 }
857 }
858
859 UInt32 CFStringEncodingBytesToUnicode(UInt32 encoding, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UInt32 *usedByteLen, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
860 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
861 UInt32 usedLen = 0;
862 UInt32 theUsedCharLen = 0;
863 UInt32 localUsedCharLen;
864 UInt32 theResult = kCFStringEncodingConversionSuccess;
865
866 if (!converter) return kCFStringEncodingConverterUnavailable;
867
868
869 while ((usedLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
870 if ((usedLen += TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen)) < numBytes) {
871 UInt32 tempUsedCharLen;
872
873 if (maxCharLen && ((maxCharLen == theUsedCharLen + localUsedCharLen) || ((flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical)) && TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, NULL, 0, &tempUsedCharLen)))) { // buffer was filled up
874 theUsedCharLen += localUsedCharLen;
875 theResult = kCFStringEncodingInsufficientOutputBufferLength;
876 break;
877 } else if (flags & kCFStringEncodingAllowLossyConversion) {
878 theUsedCharLen += localUsedCharLen;
879 usedLen += TO_UNICODE_FALLBACK(converter, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen);
880 } else {
881 theUsedCharLen += localUsedCharLen;
882 theResult = kCFStringEncodingInvalidInputStream;
883 break;
884 }
885 }
886 theUsedCharLen += localUsedCharLen;
887 }
888
889 if (usedLen < numBytes && maxCharLen && theResult == kCFStringEncodingConversionSuccess) {
890 theResult = kCFStringEncodingInsufficientOutputBufferLength;
891 }
892 if (usedCharLen) *usedCharLen = theUsedCharLen;
893 if (usedByteLen) *usedByteLen = usedLen;
894
895 return theResult;
896 }
897
898 __private_extern__ Boolean CFStringEncodingIsValidEncoding(UInt32 encoding) {
899 return (CFStringEncodingGetConverter(encoding) ? true : false);
900 }
901
902 __private_extern__ const char *CFStringEncodingName(UInt32 encoding) {
903 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
904 if (entry) return entry->encodingName;
905 return NULL;
906 }
907
908 __private_extern__ const char **CFStringEncodingCanonicalCharsetNames(UInt32 encoding) {
909 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
910 if (entry) return entry->ianaNames;
911 return NULL;
912 }
913
914 __private_extern__ UInt32 CFStringEncodingGetScriptCodeForEncoding(CFStringEncoding encoding) {
915 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
916
917 return (entry ? entry->scriptCode : (encoding == kCFStringEncodingUnicode ? kCFStringEncodingUnicode : (encoding < 0xFF ? encoding : kCFStringEncodingInvalidId)));
918 }
919
920 __private_extern__ UInt32 CFStringEncodingCharLengthForBytes(UInt32 encoding, UInt32 flags, const uint8_t *bytes, UInt32 numBytes) {
921 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
922
923 if (converter) {
924 UInt32 switchVal = (UInt32)(converter->toUnicodeLen);
925
926 if (switchVal < 0xFFFF)
927 return switchVal * numBytes;
928 else
929 return converter->toUnicodeLen(flags, bytes, numBytes);
930 }
931
932 return 0;
933 }
934
935 __private_extern__ UInt32 CFStringEncodingByteLengthForCharacters(UInt32 encoding, UInt32 flags, const UniChar *characters, UInt32 numChars) {
936 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
937
938 if (converter) {
939 UInt32 switchVal = (UInt32)(converter->toBytesLen);
940
941 if (switchVal < 0xFFFF)
942 return switchVal * numChars;
943 else
944 return converter->toBytesLen(flags, characters, numChars);
945 }
946
947 return 0;
948 }
949
950 __private_extern__ void CFStringEncodingRegisterFallbackProcedures(UInt32 encoding, CFStringEncodingToBytesFallbackProc toBytes, CFStringEncodingToUnicodeFallbackProc toUnicode) {
951 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
952
953 if (entry && __CFGetConverter(encoding)) {
954 ((_CFEncodingConverter*)entry->converter)->toBytesFallback = (toBytes ? toBytes : entry->toBytesFallback);
955 ((_CFEncodingConverter*)entry->converter)->toUnicodeFallback = (toUnicode ? toUnicode : entry->toUnicodeFallback);
956 }
957 }
958
959 __private_extern__ const CFStringEncodingConverter *CFStringEncodingGetConverter(UInt32 encoding) {
960 return __CFStringEncodingConverterGetDefinition(__CFStringEncodingConverterGetEntry(encoding));
961 }
962
963 static const UInt32 __CFBuiltinEncodings[] = {
964 kCFStringEncodingMacRoman,
965 kCFStringEncodingWindowsLatin1,
966 kCFStringEncodingISOLatin1,
967 kCFStringEncodingNextStepLatin,
968 kCFStringEncodingASCII,
969 kCFStringEncodingUTF8,
970 /* These two are available only in CFString-level */
971 kCFStringEncodingUnicode,
972 kCFStringEncodingNonLossyASCII,
973 kCFStringEncodingInvalidId,
974 };
975
976
977 __private_extern__ const UInt32 *CFStringEncodingListOfAvailableEncodings(void) {
978 return __CFBuiltinEncodings;
979 }
980