2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* CFStringEncodingConverter.c
26 Copyright 1998-2002, Apple, Inc. All rights reserved.
27 Responsibility: Aki Inoue
30 #include "CFInternal.h"
31 #include <CoreFoundation/CFArray.h>
32 #include <CoreFoundation/CFDictionary.h>
33 #include "CFUniChar.h"
34 #include "CFUtilities.h"
35 #include "CFUnicodeDecomposition.h"
36 #include "CFStringEncodingConverterExt.h"
37 #include "CFStringEncodingConverterPriv.h"
39 #if !defined(__MACOS8__)
42 #else // Mach, HP-UX, Solaris
50 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->_toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->toBytes)(flags,chars,numChars,bytes,max,used))
51 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->_toUnicode ? (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->toUnicode)(flags,bytes,numBytes,chars,max,used))
53 #define LineSeparator 0x2028
54 #define ParagraphSeparator 0x2029
55 #define ASCIINewLine 0x0a
56 #define kSurrogateHighStart 0xD800
57 #define kSurrogateHighEnd 0xDBFF
58 #define kSurrogateLowStart 0xDC00
59 #define kSurrogateLowEnd 0xDFFF
61 /* Mapping 128..255 to lossy ASCII
64 unsigned char chars
[4];
65 } _toLossyASCIITable
[] = {
66 {{' ', 0, 0, 0}}, // NO-BREAK SPACE
67 {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
68 {{'c', 0, 0, 0}}, // CENT SIGN
69 {{'L', 0, 0, 0}}, // POUND SIGN
70 {{'$', 0, 0, 0}}, // CURRENCY SIGN
71 {{'Y', 0, 0, 0}}, // YEN SIGN
72 {{'|', 0, 0, 0}}, // BROKEN BAR
73 {{0, 0, 0, 0}}, // SECTION SIGN
74 {{0, 0, 0, 0}}, // DIAERESIS
75 {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
76 {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
77 {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
78 {{0, 0, 0, 0}}, // NOT SIGN
79 {{'-', 0, 0, 0}}, // SOFT HYPHEN
80 {{'(', 'R', ')', 0}}, // REGISTERED SIGN
81 {{0, 0, 0, 0}}, // MACRON
82 {{0, 0, 0, 0}}, // DEGREE SIGN
83 {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
84 {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
85 {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
86 {{0, 0, 0, 0}}, // ACUTE ACCENT
87 {{0, 0, 0, 0}}, // MICRO SIGN
88 {{0, 0, 0, 0}}, // PILCROW SIGN
89 {{0, 0, 0, 0}}, // MIDDLE DOT
90 {{0, 0, 0, 0}}, // CEDILLA
91 {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
92 {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
93 {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
94 {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
95 {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
96 {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
97 {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
98 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
99 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
100 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
101 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
102 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
103 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
104 {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
105 {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
106 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
107 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
108 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
109 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
110 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
111 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
112 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
113 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
114 {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
115 {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
116 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
117 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
118 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
119 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
120 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
121 {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
122 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
123 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
124 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
125 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
126 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
127 {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
128 {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
129 {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
130 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
131 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
132 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
133 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
134 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
135 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
136 {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
137 {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
138 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
139 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
140 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
141 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
142 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
143 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
144 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
145 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
146 {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
147 {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
148 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
149 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
150 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
151 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
152 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
153 {{'/', 0, 0, 0}}, // DIVISION SIGN
154 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
155 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
156 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
157 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
158 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
159 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
160 {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
161 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
164 CF_INLINE UInt32
__CFToASCIILatin1Fallback(UniChar character
, UInt8
*bytes
, UInt32 maxByteLen
) {
165 const char *losChars
= (const unsigned char*)_toLossyASCIITable
+ (character
- 0xA0) * sizeof(unsigned char[4]);
166 unsigned int numBytes
= 0;
167 int idx
, max
= (maxByteLen
&& (maxByteLen
< 4) ? maxByteLen
: 4);
169 for (idx
= 0;idx
< max
;idx
++) {
171 if (maxByteLen
) bytes
[idx
] = losChars
[idx
];
181 static UInt32
__CFDefaultToBytesFallbackProc(const UniChar
*characters
, UInt32 numChars
, uint8_t *bytes
, UInt32 maxByteLen
, UInt32
*usedByteLen
) {
182 if (*characters
< 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
183 if (maxByteLen
) *bytes
= (UInt8
)(*characters
- 0x80);
186 } else if (*characters
< 0x100) {
187 *usedByteLen
= __CFToASCIILatin1Fallback(*characters
, bytes
, maxByteLen
);
189 } else if (*characters
>= kSurrogateHighStart
&& *characters
<= kSurrogateLowEnd
) {
190 if (maxByteLen
) *bytes
= '?';
192 return (numChars
> 1 && *characters
<= kSurrogateLowStart
&& *(characters
+ 1) >= kSurrogateLowStart
&& *(characters
+ 1) <= kSurrogateLowEnd
? 2 : 1);
193 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceCharacterSet
)) {
194 if (maxByteLen
) *bytes
= ' ';
197 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceAndNewlineCharacterSet
)) {
198 if (maxByteLen
) *bytes
= ASCIINewLine
;
201 } else if (!CFUniCharIsMemberOf(*characters
, kCFUniCharLetterCharacterSet
)) {
204 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharDecomposableCharacterSet
)) {
205 UTF32Char decomposed
[MAX_DECOMPOSED_LENGTH
];
207 (void)CFUniCharDecomposeCharacter(*characters
, decomposed
, MAX_DECOMPOSED_LENGTH
);
208 if (*decomposed
< 0x80) {
209 if (maxByteLen
) *bytes
= (UInt8
)(*decomposed
);
213 UTF16Char theChar
= *decomposed
;
215 return __CFDefaultToBytesFallbackProc(&theChar
, 1, bytes
, maxByteLen
, usedByteLen
);
218 if (maxByteLen
) *bytes
= '?';
224 static UInt32
__CFDefaultToUnicodeFallbackProc(const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
225 if (maxCharLen
) *characters
= (UniChar
)'?';
230 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
231 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
233 #define EXTRA_BASE (0x0F00)
235 /* Wrapper funcs for non-standard converters
237 static UInt32
__CFToBytesCheapEightBitWrapper(const void *converter
, UInt32 flags
, const UniChar
*characters
, UInt32 numChars
, uint8_t *bytes
, UInt32 maxByteLen
, UInt32
*usedByteLen
) {
238 UInt32 processedCharLen
= 0;
239 UInt32 length
= (maxByteLen
&& (maxByteLen
< numChars
) ? maxByteLen
: numChars
);
242 while (processedCharLen
< length
) {
243 if (!((CFStringEncodingCheapEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->_toBytes
)(flags
, characters
[processedCharLen
], &byte
)) break;
245 if (maxByteLen
) bytes
[processedCharLen
] = byte
;
249 *usedByteLen
= processedCharLen
;
250 return processedCharLen
;
253 static UInt32
__CFToUnicodeCheapEightBitWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
254 UInt32 processedByteLen
= 0;
255 UInt32 length
= (maxCharLen
&& (maxCharLen
< numBytes
) ? maxCharLen
: numBytes
);
258 while (processedByteLen
< length
) {
259 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
261 if (maxCharLen
) characters
[processedByteLen
] = character
;
265 *usedCharLen
= processedByteLen
;
266 return processedByteLen
;
269 static UInt32
__CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
270 UInt32 processedByteLen
= 0;
271 UInt32 theUsedCharLen
= 0;
272 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
275 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
277 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
278 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
280 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
283 usedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
284 *usedCharLen
= theUsedCharLen
;
286 for (idx
= 0;idx
< usedLen
;idx
++) {
287 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
288 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
291 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
292 *(characters
++) = (charBuffer
[idx
] >> 10) + 0xD800UL
;
293 *(characters
++) = (charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
296 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
298 *(characters
++) = charBuffer
[idx
];
302 if (maxCharLen
) *(characters
++) = character
;
308 *usedCharLen
= theUsedCharLen
;
309 return processedByteLen
;
312 static UInt32
__CFToBytesStandardEightBitWrapper(const void *converter
, UInt32 flags
, const UniChar
*characters
, UInt32 numChars
, uint8_t *bytes
, UInt32 maxByteLen
, UInt32
*usedByteLen
) {
313 UInt32 processedCharLen
= 0;
319 while (numChars
&& (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
320 if (!(usedLen
= ((CFStringEncodingStandardEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->_toBytes
)(flags
, characters
, numChars
, &byte
))) break;
322 if (maxByteLen
) bytes
[*usedByteLen
] = byte
;
324 characters
+= usedLen
;
326 processedCharLen
+= usedLen
;
329 return processedCharLen
;
332 static UInt32
__CFToUnicodeStandardEightBitWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
333 UInt32 processedByteLen
= 0;
334 #if defined(__MACOS8__) || defined(__WIN32__)
335 UniChar charBuffer
[20]; // Dynamic stack allocation is GNU specific
337 UniChar charBuffer
[((const _CFEncodingConverter
*)converter
)->maxLen
];
343 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
344 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
349 if (*usedCharLen
+ usedLen
> maxCharLen
) break;
351 for (idx
= 0;idx
< usedLen
;idx
++) {
352 characters
[*usedCharLen
+ idx
] = charBuffer
[idx
];
355 *usedCharLen
+= usedLen
;
359 return processedByteLen
;
362 static UInt32
__CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
363 UInt32 processedByteLen
= 0;
364 #if defined(__MACOS8__) || defined(__WIN32__)
365 UniChar charBuffer
[20]; // Dynamic stack allocation is GNU specific
367 UniChar charBuffer
[((const _CFEncodingConverter
*)converter
)->maxLen
];
369 UTF32Char decompBuffer
[MAX_DECOMPOSED_LENGTH
];
372 UInt32 idx
, decompIndex
;
373 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
374 UInt32 theUsedCharLen
= 0;
376 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
377 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
379 for (idx
= 0;idx
< usedLen
;idx
++) {
380 if (CFUniCharIsDecomposableCharacter(charBuffer
[idx
], isHFSPlus
)) {
381 decompedLen
= CFUniCharDecomposeCharacter(charBuffer
[idx
], decompBuffer
, MAX_DECOMPOSED_LENGTH
);
382 *usedCharLen
= theUsedCharLen
;
384 for (decompIndex
= 0;decompIndex
< decompedLen
;decompIndex
++) {
385 if (decompBuffer
[decompIndex
] > 0xFFFF) { // Non-BMP
386 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
389 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
390 *(characters
++) = (charBuffer
[idx
] >> 10) + 0xD800UL
;
391 *(characters
++) = (charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
394 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
396 *(characters
++) = charBuffer
[idx
];
400 if (maxCharLen
) *(characters
++) = charBuffer
[idx
];
407 *usedCharLen
= theUsedCharLen
;
408 return processedByteLen
;
411 static UInt32
__CFToBytesCheapMultiByteWrapper(const void *converter
, UInt32 flags
, const UniChar
*characters
, UInt32 numChars
, uint8_t *bytes
, UInt32 maxByteLen
, UInt32
*usedByteLen
) {
412 UInt32 processedCharLen
= 0;
413 #if defined(__MACOS8__) || defined(__WIN32__)
414 uint8_t byteBuffer
[20]; // Dynamic stack allocation is GNU specific
416 uint8_t byteBuffer
[((const _CFEncodingConverter
*)converter
)->maxLen
];
422 while ((processedCharLen
< numChars
) && (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
423 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToBytesProc
)((const _CFEncodingConverter
*)converter
)->_toBytes
)(flags
, characters
[processedCharLen
], byteBuffer
))) break;
428 if (*usedByteLen
+ usedLen
> maxByteLen
) break;
430 for (idx
= 0;idx
<usedLen
;idx
++) {
431 bytes
[*usedByteLen
+ idx
] = byteBuffer
[idx
];
435 *usedByteLen
+= usedLen
;
439 return processedCharLen
;
442 static UInt32
__CFToUnicodeCheapMultiByteWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
443 UInt32 processedByteLen
= 0;
449 while (numBytes
&& (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
450 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
452 if (maxCharLen
) *(characters
++) = character
;
454 processedByteLen
+= usedLen
;
459 return processedByteLen
;
462 static UInt32
__CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
463 UInt32 processedByteLen
= 0;
464 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
467 UInt32 decomposedLen
;
468 UInt32 theUsedCharLen
= 0;
469 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
471 while (numBytes
&& (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
472 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
474 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
477 decomposedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
478 *usedCharLen
= theUsedCharLen
;
480 for (idx
= 0;idx
< decomposedLen
;idx
++) {
481 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
482 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
485 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
486 *(characters
++) = (charBuffer
[idx
] >> 10) + 0xD800UL
;
487 *(characters
++) = (charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
490 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
492 *(characters
++) = charBuffer
[idx
];
496 if (maxCharLen
) *(characters
++) = character
;
500 processedByteLen
+= usedLen
;
504 *usedCharLen
= theUsedCharLen
;
505 return processedByteLen
;
510 static _CFConverterEntry __CFConverterEntryASCII
= {
511 kCFStringEncodingASCII
, NULL
,
512 "Western (ASCII)", {"us-ascii", "ascii", "iso-646-us", NULL
}, NULL
, NULL
, NULL
, NULL
,
513 kCFStringEncodingMacRoman
// We use string encoding's script range here
516 static _CFConverterEntry __CFConverterEntryISOLatin1
= {
517 kCFStringEncodingISOLatin1
, NULL
,
518 "Western (ISO Latin 1)", {"iso-8859-1", "latin1","iso-latin-1", NULL
}, NULL
, NULL
, NULL
, NULL
,
519 kCFStringEncodingMacRoman
// We use string encoding's script range here
522 static _CFConverterEntry __CFConverterEntryMacRoman
= {
523 kCFStringEncodingMacRoman
, NULL
,
524 "Western (Mac OS Roman)", {"macintosh", "mac", "x-mac-roman", NULL
}, NULL
, NULL
, NULL
, NULL
,
525 kCFStringEncodingMacRoman
// We use string encoding's script range here
528 static _CFConverterEntry __CFConverterEntryWinLatin1
= {
529 kCFStringEncodingWindowsLatin1
, NULL
,
530 "Western (Windows Latin 1)", {"windows-1252", "cp1252", "windows latin1", NULL
}, NULL
, NULL
, NULL
, NULL
,
531 kCFStringEncodingMacRoman
// We use string encoding's script range here
534 static _CFConverterEntry __CFConverterEntryNextStepLatin
= {
535 kCFStringEncodingNextStepLatin
, NULL
,
536 "Western (NextStep)", {"x-nextstep", NULL
, NULL
, NULL
}, NULL
, NULL
, NULL
, NULL
,
537 kCFStringEncodingMacRoman
// We use string encoding's script range here
540 static _CFConverterEntry __CFConverterEntryUTF8
= {
541 kCFStringEncodingUTF8
, NULL
,
542 "UTF-8", {"utf-8", "unicode-1-1-utf8", NULL
, NULL
}, NULL
, NULL
, NULL
, NULL
,
543 kCFStringEncodingUnicode
// We use string encoding's script range here
546 CF_INLINE _CFConverterEntry
*__CFStringEncodingConverterGetEntry(UInt32 encoding
) {
548 case kCFStringEncodingInvalidId
:
549 case kCFStringEncodingASCII
:
550 return &__CFConverterEntryASCII
;
552 case kCFStringEncodingISOLatin1
:
553 return &__CFConverterEntryISOLatin1
;
555 case kCFStringEncodingMacRoman
:
556 return &__CFConverterEntryMacRoman
;
558 case kCFStringEncodingWindowsLatin1
:
559 return &__CFConverterEntryWinLatin1
;
561 case kCFStringEncodingNextStepLatin
:
562 return &__CFConverterEntryNextStepLatin
;
564 case kCFStringEncodingUTF8
:
565 return &__CFConverterEntryUTF8
;
567 default: return NULL
;
571 CF_INLINE _CFEncodingConverter
*__CFEncodingConverterFromDefinition(const CFStringEncodingConverter
*definition
) {
572 #define NUM_OF_ENTRIES_CYCLE (10)
573 static CFSpinLock_t _indexLock
= 0;
574 static UInt32 _currentIndex
= 0;
575 static UInt32 _allocatedSize
= 0;
576 static _CFEncodingConverter
*_allocatedEntries
= NULL
;
577 _CFEncodingConverter
*converter
;
580 __CFSpinLock(&_indexLock
);
581 if ((_currentIndex
+ 1) >= _allocatedSize
) {
584 _allocatedEntries
= NULL
;
586 if (_allocatedEntries
== NULL
) { // Not allocated yet
587 _allocatedEntries
= (_CFEncodingConverter
*)CFAllocatorAllocate(NULL
, sizeof(_CFEncodingConverter
) * NUM_OF_ENTRIES_CYCLE
, 0);
588 _allocatedSize
= NUM_OF_ENTRIES_CYCLE
;
589 converter
= &(_allocatedEntries
[_currentIndex
]);
591 converter
= &(_allocatedEntries
[++_currentIndex
]);
593 __CFSpinUnlock(&_indexLock
);
595 switch (definition
->encodingClass
) {
596 case kCFStringEncodingConverterStandard
:
597 converter
->toBytes
= definition
->toBytes
;
598 converter
->toUnicode
= definition
->toUnicode
;
599 converter
->toCanonicalUnicode
= definition
->toUnicode
;
600 converter
->_toBytes
= NULL
;
601 converter
->_toUnicode
= NULL
;
602 converter
->maxLen
= 2;
605 case kCFStringEncodingConverterCheapEightBit
:
606 converter
->toBytes
= __CFToBytesCheapEightBitWrapper
;
607 converter
->toUnicode
= __CFToUnicodeCheapEightBitWrapper
;
608 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapEightBitWrapper
;
609 converter
->_toBytes
= definition
->toBytes
;
610 converter
->_toUnicode
= definition
->toUnicode
;
611 converter
->maxLen
= 1;
614 case kCFStringEncodingConverterStandardEightBit
:
615 converter
->toBytes
= __CFToBytesStandardEightBitWrapper
;
616 converter
->toUnicode
= __CFToUnicodeStandardEightBitWrapper
;
617 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeStandardEightBitWrapper
;
618 converter
->_toBytes
= definition
->toBytes
;
619 converter
->_toUnicode
= definition
->toUnicode
;
620 converter
->maxLen
= definition
->maxDecomposedCharLen
;
623 case kCFStringEncodingConverterCheapMultiByte
:
624 converter
->toBytes
= __CFToBytesCheapMultiByteWrapper
;
625 converter
->toUnicode
= __CFToUnicodeCheapMultiByteWrapper
;
626 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapMultiByteWrapper
;
627 converter
->_toBytes
= definition
->toBytes
;
628 converter
->_toUnicode
= definition
->toUnicode
;
629 converter
->maxLen
= definition
->maxBytesPerChar
;
632 case kCFStringEncodingConverterPlatformSpecific
:
633 converter
->toBytes
= NULL
;
634 converter
->toUnicode
= NULL
;
635 converter
->toCanonicalUnicode
= NULL
;
636 converter
->_toBytes
= NULL
;
637 converter
->_toUnicode
= NULL
;
638 converter
->maxLen
= 0;
639 converter
->toBytesLen
= NULL
;
640 converter
->toUnicodeLen
= NULL
;
641 converter
->toBytesFallback
= NULL
;
642 converter
->toUnicodeFallback
= NULL
;
643 converter
->toBytesPrecompose
= NULL
;
644 converter
->isValidCombiningChar
= NULL
;
647 default: // Shouln't be here
651 converter
->toBytesLen
= (definition
->toBytesLen
? definition
->toBytesLen
: (CFStringEncodingToBytesLenProc
)(UInt32
)definition
->maxBytesPerChar
);
652 converter
->toUnicodeLen
= (definition
->toUnicodeLen
? definition
->toUnicodeLen
: (CFStringEncodingToUnicodeLenProc
)(UInt32
)definition
->maxDecomposedCharLen
);
653 converter
->toBytesFallback
= (definition
->toBytesFallback
? definition
->toBytesFallback
: __CFDefaultToBytesFallbackProc
);
654 converter
->toUnicodeFallback
= (definition
->toUnicodeFallback
? definition
->toUnicodeFallback
: __CFDefaultToUnicodeFallbackProc
);
655 converter
->toBytesPrecompose
= (definition
->toBytesPrecompose
? definition
->toBytesPrecompose
: NULL
);
656 converter
->isValidCombiningChar
= (definition
->isValidCombiningChar
? definition
->isValidCombiningChar
: NULL
);
661 CF_INLINE
const CFStringEncodingConverter
*__CFStringEncodingConverterGetDefinition(_CFConverterEntry
*entry
) {
662 if (!entry
) return NULL
;
664 switch (entry
->encoding
) {
665 case kCFStringEncodingASCII
:
666 return &__CFConverterASCII
;
668 case kCFStringEncodingISOLatin1
:
669 return &__CFConverterISOLatin1
;
671 case kCFStringEncodingMacRoman
:
672 return &__CFConverterMacRoman
;
674 case kCFStringEncodingWindowsLatin1
:
675 return &__CFConverterWinLatin1
;
677 case kCFStringEncodingNextStepLatin
:
678 return &__CFConverterNextStepLatin
;
680 case kCFStringEncodingUTF8
:
681 return &__CFConverterUTF8
;
688 static const _CFEncodingConverter
*__CFGetConverter(UInt32 encoding
) {
689 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
691 if (!entry
) return NULL
;
693 if (!entry
->converter
) {
694 const CFStringEncodingConverter
*definition
= __CFStringEncodingConverterGetDefinition(entry
);
697 entry
->converter
= __CFEncodingConverterFromDefinition(definition
);
698 entry
->toBytesFallback
= definition
->toBytesFallback
;
699 entry
->toUnicodeFallback
= definition
->toUnicodeFallback
;
703 return (_CFEncodingConverter
*)entry
->converter
;
708 UInt32
CFStringEncodingUnicodeToBytes(UInt32 encoding
, UInt32 flags
, const UniChar
*characters
, UInt32 numChars
, UInt32
*usedCharLen
, uint8_t *bytes
, UInt32 maxByteLen
, UInt32
*usedByteLen
) {
709 if (encoding
== kCFStringEncodingUTF8
) {
710 static CFStringEncodingToBytesProc __CFToUTF8
= NULL
;
711 uint32_t convertedCharLen
;
715 if ((flags
& kCFStringEncodingUseCanonical
) || (flags
& kCFStringEncodingUseHFSPlusCanonical
)) {
716 (void)CFUniCharDecompose(characters
, numChars
, &convertedCharLen
, (void *)bytes
, maxByteLen
, &usedLen
, true, kCFUniCharUTF8Format
, (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false));
719 const CFStringEncodingConverter
*utf8Converter
= CFStringEncodingGetConverter(kCFStringEncodingUTF8
);
720 __CFToUTF8
= (CFStringEncodingToBytesProc
)utf8Converter
->toBytes
;
722 convertedCharLen
= __CFToUTF8(0, characters
, numChars
, bytes
, maxByteLen
, (UInt32
*)&usedLen
);
724 if (usedCharLen
) *usedCharLen
= convertedCharLen
;
725 if (usedByteLen
) *usedByteLen
= usedLen
;
727 if (convertedCharLen
== numChars
) {
728 return kCFStringEncodingConversionSuccess
;
729 } else if (maxByteLen
&& (maxByteLen
== usedLen
)) {
730 return kCFStringEncodingInsufficientOutputBufferLength
;
732 return kCFStringEncodingInvalidInputStream
;
735 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
737 UInt32 localUsedByteLen
;
738 UInt32 theUsedByteLen
= 0;
739 UInt32 theResult
= kCFStringEncodingConversionSuccess
;
740 CFStringEncodingToBytesPrecomposeProc toBytesPrecompose
= NULL
;
741 CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar
= NULL
;
743 if (!converter
) return kCFStringEncodingConverterUnavailable
;
745 if (flags
& kCFStringEncodingSubstituteCombinings
) {
746 if (!(flags
& kCFStringEncodingAllowLossyConversion
)) isValidCombiningChar
= converter
->isValidCombiningChar
;
748 isValidCombiningChar
= converter
->isValidCombiningChar
;
749 if (!(flags
& kCFStringEncodingIgnoreCombinings
)) {
750 toBytesPrecompose
= converter
->toBytesPrecompose
;
751 flags
|= kCFStringEncodingComposeCombinings
;
756 while ((usedLen
< numChars
) && (!maxByteLen
|| (theUsedByteLen
< maxByteLen
))) {
757 if ((usedLen
+= TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) < numChars
) {
760 if (isValidCombiningChar
&& (usedLen
> 0) && isValidCombiningChar(characters
[usedLen
])) {
761 if (toBytesPrecompose
) {
762 UInt32 localUsedLen
= usedLen
;
764 while (isValidCombiningChar(characters
[--usedLen
]));
765 theUsedByteLen
+= localUsedByteLen
;
766 if (converter
->maxLen
> 1) {
767 TO_BYTE(converter
, flags
, characters
+ usedLen
, localUsedLen
- usedLen
, NULL
, 0, &localUsedByteLen
);
768 theUsedByteLen
-= localUsedByteLen
;
772 if ((localUsedLen
= toBytesPrecompose(flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) > 0) {
773 usedLen
+= localUsedLen
;
774 if ((usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
])) { // There is a non-base char not combined remaining
775 theUsedByteLen
+= localUsedByteLen
;
776 theResult
= kCFStringEncodingInvalidInputStream
;
779 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
780 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
783 while (isValidCombiningChar(characters
[++usedLen
]));
784 localUsedByteLen
= 1;
785 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
788 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
791 theResult
= kCFStringEncodingInvalidInputStream
;
794 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
795 theUsedByteLen
+= localUsedByteLen
;
796 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
798 } else if (flags
& kCFStringEncodingIgnoreCombinings
) {
799 while ((++usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
]));
801 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
803 theUsedByteLen
+= localUsedByteLen
;
806 localUsedByteLen
= 1;
807 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
809 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
812 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
813 theUsedByteLen
+= localUsedByteLen
;
815 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
818 localUsedByteLen
= 0;
819 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
821 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
823 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
824 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
826 theUsedByteLen
+= localUsedByteLen
;
829 localUsedByteLen
= 1;
830 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
832 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
835 theUsedByteLen
+= localUsedByteLen
;
836 theResult
= kCFStringEncodingInvalidInputStream
;
840 theUsedByteLen
+= localUsedByteLen
;
843 if (usedLen
< numChars
&& maxByteLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
844 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
847 localUsedByteLen
= 0;
848 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
850 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
852 if (usedByteLen
) *usedByteLen
= theUsedByteLen
;
853 if (usedCharLen
) *usedCharLen
= usedLen
;
859 UInt32
CFStringEncodingBytesToUnicode(UInt32 encoding
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
, UInt32
*usedByteLen
, UniChar
*characters
, UInt32 maxCharLen
, UInt32
*usedCharLen
) {
860 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
862 UInt32 theUsedCharLen
= 0;
863 UInt32 localUsedCharLen
;
864 UInt32 theResult
= kCFStringEncodingConversionSuccess
;
866 if (!converter
) return kCFStringEncodingConverterUnavailable
;
869 while ((usedLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
870 if ((usedLen
+= TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
)) < numBytes
) {
871 UInt32 tempUsedCharLen
;
873 if (maxCharLen
&& ((maxCharLen
== theUsedCharLen
+ localUsedCharLen
) || ((flags
& (kCFStringEncodingUseCanonical
|kCFStringEncodingUseHFSPlusCanonical
)) && TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, NULL
, 0, &tempUsedCharLen
)))) { // buffer was filled up
874 theUsedCharLen
+= localUsedCharLen
;
875 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
877 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
878 theUsedCharLen
+= localUsedCharLen
;
879 usedLen
+= TO_UNICODE_FALLBACK(converter
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
);
881 theUsedCharLen
+= localUsedCharLen
;
882 theResult
= kCFStringEncodingInvalidInputStream
;
886 theUsedCharLen
+= localUsedCharLen
;
889 if (usedLen
< numBytes
&& maxCharLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
890 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
892 if (usedCharLen
) *usedCharLen
= theUsedCharLen
;
893 if (usedByteLen
) *usedByteLen
= usedLen
;
898 __private_extern__ Boolean
CFStringEncodingIsValidEncoding(UInt32 encoding
) {
899 return (CFStringEncodingGetConverter(encoding
) ? true : false);
902 __private_extern__
const char *CFStringEncodingName(UInt32 encoding
) {
903 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
904 if (entry
) return entry
->encodingName
;
908 __private_extern__
const char **CFStringEncodingCanonicalCharsetNames(UInt32 encoding
) {
909 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
910 if (entry
) return entry
->ianaNames
;
914 __private_extern__ UInt32
CFStringEncodingGetScriptCodeForEncoding(CFStringEncoding encoding
) {
915 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
917 return (entry
? entry
->scriptCode
: (encoding
== kCFStringEncodingUnicode
? kCFStringEncodingUnicode
: (encoding
< 0xFF ? encoding
: kCFStringEncodingInvalidId
)));
920 __private_extern__ UInt32
CFStringEncodingCharLengthForBytes(UInt32 encoding
, UInt32 flags
, const uint8_t *bytes
, UInt32 numBytes
) {
921 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
924 UInt32 switchVal
= (UInt32
)(converter
->toUnicodeLen
);
926 if (switchVal
< 0xFFFF)
927 return switchVal
* numBytes
;
929 return converter
->toUnicodeLen(flags
, bytes
, numBytes
);
935 __private_extern__ UInt32
CFStringEncodingByteLengthForCharacters(UInt32 encoding
, UInt32 flags
, const UniChar
*characters
, UInt32 numChars
) {
936 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
939 UInt32 switchVal
= (UInt32
)(converter
->toBytesLen
);
941 if (switchVal
< 0xFFFF)
942 return switchVal
* numChars
;
944 return converter
->toBytesLen(flags
, characters
, numChars
);
950 __private_extern__
void CFStringEncodingRegisterFallbackProcedures(UInt32 encoding
, CFStringEncodingToBytesFallbackProc toBytes
, CFStringEncodingToUnicodeFallbackProc toUnicode
) {
951 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
953 if (entry
&& __CFGetConverter(encoding
)) {
954 ((_CFEncodingConverter
*)entry
->converter
)->toBytesFallback
= (toBytes
? toBytes
: entry
->toBytesFallback
);
955 ((_CFEncodingConverter
*)entry
->converter
)->toUnicodeFallback
= (toUnicode
? toUnicode
: entry
->toUnicodeFallback
);
959 __private_extern__
const CFStringEncodingConverter
*CFStringEncodingGetConverter(UInt32 encoding
) {
960 return __CFStringEncodingConverterGetDefinition(__CFStringEncodingConverterGetEntry(encoding
));
963 static const UInt32 __CFBuiltinEncodings
[] = {
964 kCFStringEncodingMacRoman
,
965 kCFStringEncodingWindowsLatin1
,
966 kCFStringEncodingISOLatin1
,
967 kCFStringEncodingNextStepLatin
,
968 kCFStringEncodingASCII
,
969 kCFStringEncodingUTF8
,
970 /* These two are available only in CFString-level */
971 kCFStringEncodingUnicode
,
972 kCFStringEncodingNonLossyASCII
,
973 kCFStringEncodingInvalidId
,
977 __private_extern__
const UInt32
*CFStringEncodingListOfAvailableEncodings(void) {
978 return __CFBuiltinEncodings
;