2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
23 /* CFStringEncodingConverter.c
24 Copyright 1998-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
28 #include "CFInternal.h"
29 #include <CoreFoundation/CFArray.h>
30 #include <CoreFoundation/CFDictionary.h>
31 #include "CFUniChar.h"
32 #include "CFUtilitiesPriv.h"
33 #include "CFUnicodeDecomposition.h"
34 #include "CFStringEncodingConverterExt.h"
35 #include "CFStringEncodingConverterPriv.h"
37 #if !defined(__MACOS8__)
40 #else // Mach, HP-UX, Solaris
48 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->_toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->toBytes)(flags,chars,numChars,bytes,max,used))
49 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->_toUnicode ? (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->toUnicode)(flags,bytes,numBytes,chars,max,used))
51 #define LineSeparator 0x2028
52 #define ParagraphSeparator 0x2029
53 #define ASCIINewLine 0x0a
54 #define kSurrogateHighStart 0xD800
55 #define kSurrogateHighEnd 0xDBFF
56 #define kSurrogateLowStart 0xDC00
57 #define kSurrogateLowEnd 0xDFFF
59 /* Mapping 128..255 to lossy ASCII
62 unsigned char chars
[4];
63 } _toLossyASCIITable
[] = {
64 {{' ', 0, 0, 0}}, // NO-BREAK SPACE
65 {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
66 {{'c', 0, 0, 0}}, // CENT SIGN
67 {{'L', 0, 0, 0}}, // POUND SIGN
68 {{'$', 0, 0, 0}}, // CURRENCY SIGN
69 {{'Y', 0, 0, 0}}, // YEN SIGN
70 {{'|', 0, 0, 0}}, // BROKEN BAR
71 {{0, 0, 0, 0}}, // SECTION SIGN
72 {{0, 0, 0, 0}}, // DIAERESIS
73 {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
74 {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
75 {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
76 {{0, 0, 0, 0}}, // NOT SIGN
77 {{'-', 0, 0, 0}}, // SOFT HYPHEN
78 {{'(', 'R', ')', 0}}, // REGISTERED SIGN
79 {{0, 0, 0, 0}}, // MACRON
80 {{0, 0, 0, 0}}, // DEGREE SIGN
81 {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
82 {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
83 {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
84 {{0, 0, 0, 0}}, // ACUTE ACCENT
85 {{0, 0, 0, 0}}, // MICRO SIGN
86 {{0, 0, 0, 0}}, // PILCROW SIGN
87 {{0, 0, 0, 0}}, // MIDDLE DOT
88 {{0, 0, 0, 0}}, // CEDILLA
89 {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
90 {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
91 {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
92 {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
93 {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
94 {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
95 {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
96 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
97 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
98 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
99 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
100 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
101 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
102 {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
103 {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
104 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
105 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
106 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
107 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
108 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
109 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
110 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
111 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
112 {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
113 {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
114 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
115 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
116 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
117 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
118 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
119 {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
120 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
121 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
122 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
123 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
124 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
125 {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
126 {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
127 {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
128 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
129 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
130 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
131 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
132 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
133 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
134 {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
135 {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
136 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
137 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
138 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
139 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
140 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
141 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
142 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
143 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
144 {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
145 {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
146 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
147 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
148 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
149 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
150 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
151 {{'/', 0, 0, 0}}, // DIVISION SIGN
152 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
153 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
154 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
155 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
156 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
157 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
158 {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
159 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
162 CF_INLINE UInt32
__CFToASCIILatin1Fallback(UniChar character
, UInt8
*bytes
, UInt32 maxByteLen
) {
163 const char *losChars
= (const unsigned char*)_toLossyASCIITable
+ (character
- 0xA0) * sizeof(unsigned char[4]);
164 unsigned int numBytes
= 0;
165 int idx
, max
= (maxByteLen
&& (maxByteLen
< 4) ? maxByteLen
: 4);
167 for (idx
= 0;idx
< max
;idx
++) {
169 if (maxByteLen
) bytes
[idx
] = losChars
[idx
];
179 static UInt32
__CFDefaultToBytesFallbackProc(const UniChar
*characters
, UInt32 numChars
, uint8_t *bytes
, UInt32 maxByteLen
, UInt32
*usedByteLen
) {
180 if (*characters
< 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
181 if (maxByteLen
) *bytes
= (UInt8
)(*characters
- 0x80);
184 } else if (*characters
< 0x100) {
185 *usedByteLen
= __CFToASCIILatin1Fallback(*characters
, bytes
, maxByteLen
);
187 } else if (*characters
>= kSurrogateHighStart
&& *characters
<= kSurrogateLowEnd
) {
188 if (maxByteLen
) *bytes
= '?';
190 return (numChars
> 1 && *characters
<= kSurrogateLowStart
&& *(characters
+ 1) >= kSurrogateLowStart
&& *(characters
+ 1) <= kSurrogateLowEnd
? 2 : 1);
191 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceCharacterSet
)) {
192 if (maxByteLen
) *bytes
= ' ';
195 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceAndNewlineCharacterSet
)) {
196 if (maxByteLen
) *bytes
= ASCIINewLine
;
199 } else if (!CFUniCharIsMemberOf(*characters
, kCFUniCharLetterCharacterSet
)) {
202 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharDecomposableCharacterSet
)) {
203 UTF32Char decomposed
[MAX_DECOMPOSED_LENGTH
];
205 (void)CFUniCharDecomposeCharacter(*characters
, decomposed
, MAX_DECOMPOSED_LENGTH
);
206 if (*decomposed
< 0x80) {
207 if (maxByteLen
) *bytes
= (UInt8
)(*decomposed
);
211 UTF16Char theChar
= *decomposed
;
213 return __CFDefaultToBytesFallbackProc(&theChar
, 1, bytes
, maxByteLen
, usedByteLen
);
216 if (maxByteLen
) *bytes
= '?';
222 static UInt32
__CFDefaultToUnicodeFallbackProc(const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
223 if (maxCharLen
) *characters
= (UniChar
)'?';
228 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
229 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
231 #define EXTRA_BASE (0x0F00)
233 /* Wrapper funcs for non-standard converters
235 static UInt32
__CFToBytesCheapEightBitWrapper(const void *converter
, UInt32 flags
, const UniChar
*characters
, UInt32 numChars
, uint8_t *bytes
, UInt32 maxByteLen
, UInt32
*usedByteLen
) {
236 UInt32 processedCharLen
= 0;
237 UInt32 length
= (maxByteLen
&& (maxByteLen
< numChars
) ? maxByteLen
: numChars
);
240 while (processedCharLen
< length
) {
241 if (!((CFStringEncodingCheapEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->_toBytes
)(flags
, characters
[processedCharLen
], &byte
)) break;
243 if (maxByteLen
) bytes
[processedCharLen
] = byte
;
247 *usedByteLen
= processedCharLen
;
248 return processedCharLen
;
251 static UInt32
__CFToUnicodeCheapEightBitWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
252 UInt32 processedByteLen
= 0;
253 UInt32 length
= (maxCharLen
&& (maxCharLen
< numBytes
) ? maxCharLen
: numBytes
);
256 while (processedByteLen
< length
) {
257 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
259 if (maxCharLen
) characters
[processedByteLen
] = character
;
263 *usedCharLen
= processedByteLen
;
264 return processedByteLen
;
267 static UInt32
__CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
268 UInt32 processedByteLen
= 0;
269 UInt32 theUsedCharLen
= 0;
270 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
273 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
275 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
276 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
278 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
281 usedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
282 *usedCharLen
= theUsedCharLen
;
284 for (idx
= 0;idx
< usedLen
;idx
++) {
285 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
286 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
289 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
290 *(characters
++) = (charBuffer
[idx
] >> 10) + 0xD800UL
;
291 *(characters
++) = (charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
294 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
296 *(characters
++) = charBuffer
[idx
];
300 if (maxCharLen
) *(characters
++) = character
;
306 *usedCharLen
= theUsedCharLen
;
307 return processedByteLen
;
310 static UInt32
__CFToBytesStandardEightBitWrapper(const void *converter
, UInt32 flags
, const UniChar
*characters
, UInt32 numChars
, uint8_t *bytes
, UInt32 maxByteLen
, UInt32
*usedByteLen
) {
311 UInt32 processedCharLen
= 0;
317 while (numChars
&& (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
318 if (!(usedLen
= ((CFStringEncodingStandardEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->_toBytes
)(flags
, characters
, numChars
, &byte
))) break;
320 if (maxByteLen
) bytes
[*usedByteLen
] = byte
;
322 characters
+= usedLen
;
324 processedCharLen
+= usedLen
;
327 return processedCharLen
;
330 static UInt32
__CFToUnicodeStandardEightBitWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
331 UInt32 processedByteLen
= 0;
332 #if defined(__MACOS8__) || defined(__WIN32__)
333 UniChar charBuffer
[20]; // Dynamic stack allocation is GNU specific
335 UniChar charBuffer
[((const _CFEncodingConverter
*)converter
)->maxLen
];
341 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
342 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
347 if (*usedCharLen
+ usedLen
> maxCharLen
) break;
349 for (idx
= 0;idx
< usedLen
;idx
++) {
350 characters
[*usedCharLen
+ idx
] = charBuffer
[idx
];
353 *usedCharLen
+= usedLen
;
357 return processedByteLen
;
360 static UInt32
__CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
361 UInt32 processedByteLen
= 0;
362 #if defined(__MACOS8__) || defined(__WIN32__)
363 UniChar charBuffer
[20]; // Dynamic stack allocation is GNU specific
365 UniChar charBuffer
[((const _CFEncodingConverter
*)converter
)->maxLen
];
367 UTF32Char decompBuffer
[MAX_DECOMPOSED_LENGTH
];
370 UInt32 idx
, decompIndex
;
371 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
372 UInt32 theUsedCharLen
= 0;
374 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
375 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
377 for (idx
= 0;idx
< usedLen
;idx
++) {
378 if (CFUniCharIsDecomposableCharacter(charBuffer
[idx
], isHFSPlus
)) {
379 decompedLen
= CFUniCharDecomposeCharacter(charBuffer
[idx
], decompBuffer
, MAX_DECOMPOSED_LENGTH
);
380 *usedCharLen
= theUsedCharLen
;
382 for (decompIndex
= 0;decompIndex
< decompedLen
;decompIndex
++) {
383 if (decompBuffer
[decompIndex
] > 0xFFFF) { // Non-BMP
384 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
387 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
388 *(characters
++) = (charBuffer
[idx
] >> 10) + 0xD800UL
;
389 *(characters
++) = (charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
392 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
394 *(characters
++) = charBuffer
[idx
];
398 if (maxCharLen
) *(characters
++) = charBuffer
[idx
];
405 *usedCharLen
= theUsedCharLen
;
406 return processedByteLen
;
409 static UInt32
__CFToBytesCheapMultiByteWrapper(const void *converter
, UInt32 flags
, const UniChar
*characters
, UInt32 numChars
, uint8_t *bytes
, UInt32 maxByteLen
, UInt32
*usedByteLen
) {
410 UInt32 processedCharLen
= 0;
411 #if defined(__MACOS8__) || defined(__WIN32__)
412 uint8_t byteBuffer
[20]; // Dynamic stack allocation is GNU specific
414 uint8_t byteBuffer
[((const _CFEncodingConverter
*)converter
)->maxLen
];
420 while ((processedCharLen
< numChars
) && (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
421 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToBytesProc
)((const _CFEncodingConverter
*)converter
)->_toBytes
)(flags
, characters
[processedCharLen
], byteBuffer
))) break;
426 if (*usedByteLen
+ usedLen
> maxByteLen
) break;
428 for (idx
= 0;idx
<usedLen
;idx
++) {
429 bytes
[*usedByteLen
+ idx
] = byteBuffer
[idx
];
433 *usedByteLen
+= usedLen
;
437 return processedCharLen
;
440 static UInt32
__CFToUnicodeCheapMultiByteWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
441 UInt32 processedByteLen
= 0;
447 while (numBytes
&& (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
448 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
450 if (maxCharLen
) *(characters
++) = character
;
452 processedByteLen
+= usedLen
;
457 return processedByteLen
;
460 static UInt32
__CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
461 UInt32 processedByteLen
= 0;
462 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
465 UInt32 decomposedLen
;
466 UInt32 theUsedCharLen
= 0;
467 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
469 while (numBytes
&& (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
470 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
472 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
475 decomposedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
476 *usedCharLen
= theUsedCharLen
;
478 for (idx
= 0;idx
< decomposedLen
;idx
++) {
479 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
480 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
483 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
484 *(characters
++) = (charBuffer
[idx
] >> 10) + 0xD800UL
;
485 *(characters
++) = (charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
488 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
490 *(characters
++) = charBuffer
[idx
];
494 if (maxCharLen
) *(characters
++) = character
;
498 processedByteLen
+= usedLen
;
502 *usedCharLen
= theUsedCharLen
;
503 return processedByteLen
;
508 static _CFConverterEntry __CFConverterEntryASCII
= {
509 kCFStringEncodingASCII
, NULL
,
510 "Western (ASCII)", {"us-ascii", "ascii", "iso-646-us", NULL
}, NULL
, NULL
, NULL
, NULL
,
511 kCFStringEncodingMacRoman
// We use string encoding's script range here
514 static _CFConverterEntry __CFConverterEntryISOLatin1
= {
515 kCFStringEncodingISOLatin1
, NULL
,
516 "Western (ISO Latin 1)", {"iso-8859-1", "latin1","iso-latin-1", NULL
}, NULL
, NULL
, NULL
, NULL
,
517 kCFStringEncodingMacRoman
// We use string encoding's script range here
520 static _CFConverterEntry __CFConverterEntryMacRoman
= {
521 kCFStringEncodingMacRoman
, NULL
,
522 "Western (Mac OS Roman)", {"macintosh", "mac", "x-mac-roman", NULL
}, NULL
, NULL
, NULL
, NULL
,
523 kCFStringEncodingMacRoman
// We use string encoding's script range here
526 static _CFConverterEntry __CFConverterEntryWinLatin1
= {
527 kCFStringEncodingWindowsLatin1
, NULL
,
528 "Western (Windows Latin 1)", {"windows-1252", "cp1252", "windows latin1", NULL
}, NULL
, NULL
, NULL
, NULL
,
529 kCFStringEncodingMacRoman
// We use string encoding's script range here
532 static _CFConverterEntry __CFConverterEntryNextStepLatin
= {
533 kCFStringEncodingNextStepLatin
, NULL
,
534 "Western (NextStep)", {"x-nextstep", NULL
, NULL
, NULL
}, NULL
, NULL
, NULL
, NULL
,
535 kCFStringEncodingMacRoman
// We use string encoding's script range here
538 static _CFConverterEntry __CFConverterEntryUTF8
= {
539 kCFStringEncodingUTF8
, NULL
,
540 "UTF-8", {"utf-8", "unicode-1-1-utf8", NULL
, NULL
}, NULL
, NULL
, NULL
, NULL
,
541 kCFStringEncodingUnicode
// We use string encoding's script range here
544 CF_INLINE _CFConverterEntry
*__CFStringEncodingConverterGetEntry(UInt32 encoding
) {
546 case kCFStringEncodingInvalidId
:
547 case kCFStringEncodingASCII
:
548 return &__CFConverterEntryASCII
;
550 case kCFStringEncodingISOLatin1
:
551 return &__CFConverterEntryISOLatin1
;
553 case kCFStringEncodingMacRoman
:
554 return &__CFConverterEntryMacRoman
;
556 case kCFStringEncodingWindowsLatin1
:
557 return &__CFConverterEntryWinLatin1
;
559 case kCFStringEncodingNextStepLatin
:
560 return &__CFConverterEntryNextStepLatin
;
562 case kCFStringEncodingUTF8
:
563 return &__CFConverterEntryUTF8
;
571 CF_INLINE _CFEncodingConverter
*__CFEncodingConverterFromDefinition(const CFStringEncodingConverter
*definition
) {
572 #define NUM_OF_ENTRIES_CYCLE (10)
573 static CFSpinLock_t _indexLock
= 0;
574 static UInt32 _currentIndex
= 0;
575 static UInt32 _allocatedSize
= 0;
576 static _CFEncodingConverter
*_allocatedEntries
= NULL
;
577 _CFEncodingConverter
*converter
;
580 __CFSpinLock(&_indexLock
);
581 if ((_currentIndex
+ 1) >= _allocatedSize
) {
584 _allocatedEntries
= NULL
;
586 if (_allocatedEntries
== NULL
) { // Not allocated yet
587 _allocatedEntries
= (_CFEncodingConverter
*)CFAllocatorAllocate(NULL
, sizeof(_CFEncodingConverter
) * NUM_OF_ENTRIES_CYCLE
, 0);
588 _allocatedSize
= NUM_OF_ENTRIES_CYCLE
;
589 converter
= &(_allocatedEntries
[_currentIndex
]);
591 converter
= &(_allocatedEntries
[++_currentIndex
]);
593 __CFSpinUnlock(&_indexLock
);
595 switch (definition
->encodingClass
) {
596 case kCFStringEncodingConverterStandard
:
597 converter
->toBytes
= definition
->toBytes
;
598 converter
->toUnicode
= definition
->toUnicode
;
599 converter
->toCanonicalUnicode
= definition
->toUnicode
;
600 converter
->_toBytes
= NULL
;
601 converter
->_toUnicode
= NULL
;
602 converter
->maxLen
= 2;
605 case kCFStringEncodingConverterCheapEightBit
:
606 converter
->toBytes
= __CFToBytesCheapEightBitWrapper
;
607 converter
->toUnicode
= __CFToUnicodeCheapEightBitWrapper
;
608 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapEightBitWrapper
;
609 converter
->_toBytes
= definition
->toBytes
;
610 converter
->_toUnicode
= definition
->toUnicode
;
611 converter
->maxLen
= 1;
614 case kCFStringEncodingConverterStandardEightBit
:
615 converter
->toBytes
= __CFToBytesStandardEightBitWrapper
;
616 converter
->toUnicode
= __CFToUnicodeStandardEightBitWrapper
;
617 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeStandardEightBitWrapper
;
618 converter
->_toBytes
= definition
->toBytes
;
619 converter
->_toUnicode
= definition
->toUnicode
;
620 converter
->maxLen
= definition
->maxDecomposedCharLen
;
623 case kCFStringEncodingConverterCheapMultiByte
:
624 converter
->toBytes
= __CFToBytesCheapMultiByteWrapper
;
625 converter
->toUnicode
= __CFToUnicodeCheapMultiByteWrapper
;
626 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapMultiByteWrapper
;
627 converter
->_toBytes
= definition
->toBytes
;
628 converter
->_toUnicode
= definition
->toUnicode
;
629 converter
->maxLen
= definition
->maxBytesPerChar
;
632 case kCFStringEncodingConverterPlatformSpecific
:
633 converter
->toBytes
= NULL
;
634 converter
->toUnicode
= NULL
;
635 converter
->toCanonicalUnicode
= NULL
;
636 converter
->_toBytes
= NULL
;
637 converter
->_toUnicode
= NULL
;
638 converter
->maxLen
= 0;
639 converter
->toBytesLen
= NULL
;
640 converter
->toUnicodeLen
= NULL
;
641 converter
->toBytesFallback
= NULL
;
642 converter
->toUnicodeFallback
= NULL
;
643 converter
->toBytesPrecompose
= NULL
;
644 converter
->isValidCombiningChar
= NULL
;
647 default: // Shouln't be here
651 converter
->toBytesLen
= (definition
->toBytesLen
? definition
->toBytesLen
: (CFStringEncodingToBytesLenProc
)(UInt32
)definition
->maxBytesPerChar
);
652 converter
->toUnicodeLen
= (definition
->toUnicodeLen
? definition
->toUnicodeLen
: (CFStringEncodingToUnicodeLenProc
)(UInt32
)definition
->maxDecomposedCharLen
);
653 converter
->toBytesFallback
= (definition
->toBytesFallback
? definition
->toBytesFallback
: __CFDefaultToBytesFallbackProc
);
654 converter
->toUnicodeFallback
= (definition
->toUnicodeFallback
? definition
->toUnicodeFallback
: __CFDefaultToUnicodeFallbackProc
);
655 converter
->toBytesPrecompose
= (definition
->toBytesPrecompose
? definition
->toBytesPrecompose
: NULL
);
656 converter
->isValidCombiningChar
= (definition
->isValidCombiningChar
? definition
->isValidCombiningChar
: NULL
);
661 CF_INLINE
const CFStringEncodingConverter
*__CFStringEncodingConverterGetDefinition(_CFConverterEntry
*entry
) {
662 if (!entry
) return NULL
;
664 switch (entry
->encoding
) {
665 case kCFStringEncodingASCII
:
666 return &__CFConverterASCII
;
668 case kCFStringEncodingISOLatin1
:
669 return &__CFConverterISOLatin1
;
671 case kCFStringEncodingMacRoman
:
672 return &__CFConverterMacRoman
;
674 case kCFStringEncodingWindowsLatin1
:
675 return &__CFConverterWinLatin1
;
677 case kCFStringEncodingNextStepLatin
:
678 return &__CFConverterNextStepLatin
;
680 case kCFStringEncodingUTF8
:
681 return &__CFConverterUTF8
;
688 static const _CFEncodingConverter
*__CFGetConverter(UInt32 encoding
) {
689 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
691 if (!entry
) return NULL
;
693 if (!entry
->converter
) {
694 const CFStringEncodingConverter
*definition
= __CFStringEncodingConverterGetDefinition(entry
);
697 entry
->converter
= __CFEncodingConverterFromDefinition(definition
);
698 entry
->toBytesFallback
= definition
->toBytesFallback
;
699 entry
->toUnicodeFallback
= definition
->toUnicodeFallback
;
703 return (_CFEncodingConverter
*)entry
->converter
;
708 UInt32
CFStringEncodingUnicodeToBytes(UInt32 encoding
, UInt32 flags
, const UniChar
*characters
, UInt32 numChars
, UInt32
*usedCharLen
, uint8_t *bytes
, UInt32 maxByteLen
, UInt32
*usedByteLen
) {
709 if (encoding
== kCFStringEncodingUTF8
) {
710 static CFStringEncodingToBytesProc __CFToUTF8
= NULL
;
711 uint32_t convertedCharLen
;
715 if ((flags
& kCFStringEncodingUseCanonical
) || (flags
& kCFStringEncodingUseHFSPlusCanonical
)) {
716 (void)CFUniCharDecompose(characters
, numChars
, &convertedCharLen
, (void *)bytes
, maxByteLen
, &usedLen
, true, kCFUniCharUTF8Format
, (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false));
719 const CFStringEncodingConverter
*utf8Converter
= CFStringEncodingGetConverter(kCFStringEncodingUTF8
);
720 __CFToUTF8
= (CFStringEncodingToBytesProc
)utf8Converter
->toBytes
;
722 convertedCharLen
= __CFToUTF8(0, characters
, numChars
, bytes
, maxByteLen
, (UInt32
*)&usedLen
);
724 if (usedCharLen
) *usedCharLen
= convertedCharLen
;
725 if (usedByteLen
) *usedByteLen
= usedLen
;
727 if (convertedCharLen
== numChars
) {
728 return kCFStringEncodingConversionSuccess
;
729 } else if (maxByteLen
&& (maxByteLen
== usedLen
)) {
730 return kCFStringEncodingInsufficientOutputBufferLength
;
732 return kCFStringEncodingInvalidInputStream
;
735 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
737 UInt32 localUsedByteLen
;
738 UInt32 theUsedByteLen
= 0;
739 UInt32 theResult
= kCFStringEncodingConversionSuccess
;
740 CFStringEncodingToBytesPrecomposeProc toBytesPrecompose
= NULL
;
741 CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar
= NULL
;
743 if (!converter
) return kCFStringEncodingConverterUnavailable
;
745 if (flags
& kCFStringEncodingSubstituteCombinings
) {
746 if (!(flags
& kCFStringEncodingAllowLossyConversion
)) isValidCombiningChar
= converter
->isValidCombiningChar
;
748 isValidCombiningChar
= converter
->isValidCombiningChar
;
749 if (!(flags
& kCFStringEncodingIgnoreCombinings
)) {
750 toBytesPrecompose
= converter
->toBytesPrecompose
;
751 flags
|= kCFStringEncodingComposeCombinings
;
756 while ((usedLen
< numChars
) && (!maxByteLen
|| (theUsedByteLen
< maxByteLen
))) {
757 if ((usedLen
+= TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) < numChars
) {
760 if (isValidCombiningChar
&& (usedLen
> 0) && isValidCombiningChar(characters
[usedLen
])) {
761 if (toBytesPrecompose
) {
762 UInt32 localUsedLen
= usedLen
;
764 while (isValidCombiningChar(characters
[--usedLen
]));
765 theUsedByteLen
+= localUsedByteLen
;
766 if (converter
->maxLen
> 1) {
767 TO_BYTE(converter
, flags
, characters
+ usedLen
, localUsedLen
- usedLen
, NULL
, 0, &localUsedByteLen
);
768 theUsedByteLen
-= localUsedByteLen
;
772 if ((localUsedLen
= toBytesPrecompose(flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) > 0) {
773 usedLen
+= localUsedLen
;
774 if ((usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
])) { // There is a non-base char not combined remaining
775 theUsedByteLen
+= localUsedByteLen
;
776 theResult
= kCFStringEncodingInvalidInputStream
;
779 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
780 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
783 while (isValidCombiningChar(characters
[++usedLen
]));
784 localUsedByteLen
= 1;
785 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
788 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
791 theResult
= kCFStringEncodingInvalidInputStream
;
794 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
795 theUsedByteLen
+= localUsedByteLen
;
796 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
798 } else if (flags
& kCFStringEncodingIgnoreCombinings
) {
799 while ((++usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
]));
801 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
803 theUsedByteLen
+= localUsedByteLen
;
806 localUsedByteLen
= 1;
807 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
809 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
812 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
813 theUsedByteLen
+= localUsedByteLen
;
815 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
818 localUsedByteLen
= 0;
819 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
821 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
823 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
824 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
826 theUsedByteLen
+= localUsedByteLen
;
829 localUsedByteLen
= 1;
830 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
832 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
835 theUsedByteLen
+= localUsedByteLen
;
836 theResult
= kCFStringEncodingInvalidInputStream
;
840 theUsedByteLen
+= localUsedByteLen
;
843 if (usedLen
< numChars
&& maxByteLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
844 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
847 localUsedByteLen
= 0;
848 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
850 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
852 if (usedByteLen
) *usedByteLen
= theUsedByteLen
;
853 if (usedCharLen
) *usedCharLen
= usedLen
;
859 UInt32
CFStringEncodingBytesToUnicode(UInt32 encoding
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UInt32
*usedByteLen
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
860 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
862 UInt32 theUsedCharLen
= 0;
863 UInt32 localUsedCharLen
;
864 UInt32 theResult
= kCFStringEncodingConversionSuccess
;
866 if (!converter
) return kCFStringEncodingConverterUnavailable
;
869 while ((usedLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
870 if ((usedLen
+= TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
)) < numBytes
) {
871 UInt32 tempUsedCharLen
;
873 if (maxCharLen
&& ((maxCharLen
== theUsedCharLen
+ localUsedCharLen
) || ((flags
& (kCFStringEncodingUseCanonical
|kCFStringEncodingUseHFSPlusCanonical
)) && TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, NULL
, 0, &tempUsedCharLen
)))) { // buffer was filled up
874 theUsedCharLen
+= localUsedCharLen
;
875 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
877 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
878 theUsedCharLen
+= localUsedCharLen
;
879 usedLen
+= TO_UNICODE_FALLBACK(converter
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
);
881 theUsedCharLen
+= localUsedCharLen
;
882 theResult
= kCFStringEncodingInvalidInputStream
;
886 theUsedCharLen
+= localUsedCharLen
;
889 if (usedLen
< numBytes
&& maxCharLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
890 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
892 if (usedCharLen
) *usedCharLen
= theUsedCharLen
;
893 if (usedByteLen
) *usedByteLen
= usedLen
;
898 __private_extern__ Boolean
CFStringEncodingIsValidEncoding(UInt32 encoding
) {
899 return (CFStringEncodingGetConverter(encoding
) ? true : false);
902 __private_extern__
const char *CFStringEncodingName(UInt32 encoding
) {
903 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
904 if (entry
) return entry
->encodingName
;
908 __private_extern__
const char **CFStringEncodingCanonicalCharsetNames(UInt32 encoding
) {
909 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
910 if (entry
) return entry
->ianaNames
;
914 __private_extern__ UInt32
CFStringEncodingGetScriptCodeForEncoding(CFStringEncoding encoding
) {
915 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
917 return (entry
? entry
->scriptCode
: ((encoding
& 0x0FFF) == kCFStringEncodingUnicode
? kCFStringEncodingUnicode
: (encoding
< 0xFF ? encoding
: kCFStringEncodingInvalidId
)));
920 __private_extern__ UInt32
CFStringEncodingCharLengthForBytes(UInt32 encoding
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
) {
921 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
924 UInt32 switchVal
= (UInt32
)(converter
->toUnicodeLen
);
926 if (switchVal
< 0xFFFF)
927 return switchVal
* numBytes
;
929 return converter
->toUnicodeLen(flags
, bytes
, numBytes
);
935 __private_extern__ UInt32
CFStringEncodingByteLengthForCharacters(UInt32 encoding
, UInt32 flags
, const UniChar
*characters
, UInt32 numChars
) {
936 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
939 UInt32 switchVal
= (UInt32
)(converter
->toBytesLen
);
941 if (switchVal
< 0xFFFF)
942 return switchVal
* numChars
;
944 return converter
->toBytesLen(flags
, characters
, numChars
);
950 __private_extern__
void CFStringEncodingRegisterFallbackProcedures(UInt32 encoding
, CFStringEncodingToBytesFallbackProc toBytes
, CFStringEncodingToUnicodeFallbackProc toUnicode
) {
951 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
953 if (entry
&& __CFGetConverter(encoding
)) {
954 ((_CFEncodingConverter
*)entry
->converter
)->toBytesFallback
= (toBytes
? toBytes
: entry
->toBytesFallback
);
955 ((_CFEncodingConverter
*)entry
->converter
)->toUnicodeFallback
= (toUnicode
? toUnicode
: entry
->toUnicodeFallback
);
959 __private_extern__
const CFStringEncodingConverter
*CFStringEncodingGetConverter(UInt32 encoding
) {
960 return __CFStringEncodingConverterGetDefinition(__CFStringEncodingConverterGetEntry(encoding
));
963 static const UInt32 __CFBuiltinEncodings
[] = {
964 kCFStringEncodingMacRoman
,
965 kCFStringEncodingWindowsLatin1
,
966 kCFStringEncodingISOLatin1
,
967 kCFStringEncodingNextStepLatin
,
968 kCFStringEncodingASCII
,
969 kCFStringEncodingUTF8
,
970 /* These seven are available only in CFString-level */
971 kCFStringEncodingNonLossyASCII
,
973 kCFStringEncodingUTF16
,
974 kCFStringEncodingUTF16BE
,
975 kCFStringEncodingUTF16LE
,
977 kCFStringEncodingUTF32
,
978 kCFStringEncodingUTF32BE
,
979 kCFStringEncodingUTF32LE
,
981 kCFStringEncodingInvalidId
,
985 __private_extern__
const UInt32
*CFStringEncodingListOfAvailableEncodings(void) {
986 return __CFBuiltinEncodings
;