2 * Copyright (c) 2008 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
23 /* CFStringEncodingConverter.c
24 Copyright 1998-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
28 #include "CFInternal.h"
29 #include <CoreFoundation/CFArray.h>
30 #include <CoreFoundation/CFDictionary.h>
31 #include "CFUniChar.h"
33 #include "CFUnicodeDecomposition.h"
34 #include "CFStringEncodingConverterExt.h"
35 #include "CFStringEncodingConverterPriv.h"
37 #if !defined(__WIN32__)
44 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->_toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->toBytes)(flags,chars,numChars,bytes,max,used))
45 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->_toUnicode ? (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->toUnicode)(flags,bytes,numBytes,chars,max,used))
47 #define ASCIINewLine 0x0a
48 #define kSurrogateHighStart 0xD800
49 #define kSurrogateHighEnd 0xDBFF
50 #define kSurrogateLowStart 0xDC00
51 #define kSurrogateLowEnd 0xDFFF
53 /* Mapping 128..255 to lossy ASCII
56 unsigned char chars
[4];
57 } _toLossyASCIITable
[] = {
58 {{' ', 0, 0, 0}}, // NO-BREAK SPACE
59 {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
60 {{'c', 0, 0, 0}}, // CENT SIGN
61 {{'L', 0, 0, 0}}, // POUND SIGN
62 {{'$', 0, 0, 0}}, // CURRENCY SIGN
63 {{'Y', 0, 0, 0}}, // YEN SIGN
64 {{'|', 0, 0, 0}}, // BROKEN BAR
65 {{0, 0, 0, 0}}, // SECTION SIGN
66 {{0, 0, 0, 0}}, // DIAERESIS
67 {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
68 {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
69 {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
70 {{0, 0, 0, 0}}, // NOT SIGN
71 {{'-', 0, 0, 0}}, // SOFT HYPHEN
72 {{'(', 'R', ')', 0}}, // REGISTERED SIGN
73 {{0, 0, 0, 0}}, // MACRON
74 {{0, 0, 0, 0}}, // DEGREE SIGN
75 {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
76 {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
77 {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
78 {{0, 0, 0, 0}}, // ACUTE ACCENT
79 {{0, 0, 0, 0}}, // MICRO SIGN
80 {{0, 0, 0, 0}}, // PILCROW SIGN
81 {{0, 0, 0, 0}}, // MIDDLE DOT
82 {{0, 0, 0, 0}}, // CEDILLA
83 {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
84 {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
85 {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
86 {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
87 {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
88 {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
89 {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
90 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
91 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
92 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
93 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
94 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
95 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
96 {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
97 {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
98 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
99 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
100 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
101 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
102 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
103 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
104 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
105 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
106 {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
107 {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
108 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
109 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
110 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
111 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
112 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
113 {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
114 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
115 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
116 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
117 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
118 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
119 {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
120 {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
121 {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
122 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
123 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
124 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
125 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
126 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
127 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
128 {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
129 {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
130 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
131 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
132 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
133 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
134 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
135 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
136 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
137 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
138 {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
139 {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
140 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
141 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
142 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
143 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
144 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
145 {{'/', 0, 0, 0}}, // DIVISION SIGN
146 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
147 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
148 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
149 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
150 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
151 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
152 {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
153 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
156 CF_INLINE CFIndex
__CFToASCIILatin1Fallback(UniChar character
, uint8_t *bytes
, CFIndex maxByteLen
) {
157 const uint8_t *losChars
= (const uint8_t*)_toLossyASCIITable
+ (character
- 0xA0) * sizeof(uint8_t[4]);
158 CFIndex numBytes
= 0;
159 CFIndex idx
, max
= (maxByteLen
&& (maxByteLen
< 4) ? maxByteLen
: 4);
161 for (idx
= 0;idx
< max
;idx
++) {
163 if (maxByteLen
) bytes
[idx
] = losChars
[idx
];
173 static CFIndex
__CFDefaultToBytesFallbackProc(const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
174 CFIndex processCharLen
= 1, filledBytesLen
= 1;
177 if (*characters
< 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
178 byte
= (uint8_t)(*characters
- 0x80);
179 } else if (*characters
< 0x100) {
180 *usedByteLen
= __CFToASCIILatin1Fallback(*characters
, bytes
, maxByteLen
);
182 } else if (*characters
>= kSurrogateHighStart
&& *characters
<= kSurrogateLowEnd
) {
183 processCharLen
= (numChars
> 1 && *characters
<= kSurrogateLowStart
&& *(characters
+ 1) >= kSurrogateLowStart
&& *(characters
+ 1) <= kSurrogateLowEnd
? 2 : 1);
184 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceCharacterSet
)) {
186 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceAndNewlineCharacterSet
)) {
188 } else if (*characters
== 0x2026) { // ellipsis
189 if (0 == maxByteLen
) {
191 } else if (maxByteLen
> 2) {
192 memset(bytes
, '.', 3);
194 return processCharLen
;
196 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharDecomposableCharacterSet
)) {
197 UTF32Char decomposed
[MAX_DECOMPOSED_LENGTH
];
199 (void)CFUniCharDecomposeCharacter(*characters
, decomposed
, MAX_DECOMPOSED_LENGTH
);
200 if (*decomposed
< 0x80) {
201 byte
= (uint8_t)(*decomposed
);
203 UTF16Char theChar
= *decomposed
;
205 return __CFDefaultToBytesFallbackProc(&theChar
, 1, bytes
, maxByteLen
, usedByteLen
);
209 if (maxByteLen
) *bytes
= byte
;
210 *usedByteLen
= filledBytesLen
;
211 return processCharLen
;
214 static CFIndex
__CFDefaultToUnicodeFallbackProc(const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
215 if (maxCharLen
) *characters
= (UniChar
)'?';
220 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
221 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
223 #define EXTRA_BASE (0x0F00)
225 /* Wrapper funcs for non-standard converters
227 static CFIndex
__CFToBytesCheapEightBitWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
228 CFIndex processedCharLen
= 0;
229 CFIndex length
= (maxByteLen
&& (maxByteLen
< numChars
) ? maxByteLen
: numChars
);
232 while (processedCharLen
< length
) {
233 if (!((CFStringEncodingCheapEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->_toBytes
)(flags
, characters
[processedCharLen
], &byte
)) break;
235 if (maxByteLen
) bytes
[processedCharLen
] = byte
;
239 *usedByteLen
= processedCharLen
;
240 return processedCharLen
;
243 static CFIndex
__CFToUnicodeCheapEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
244 CFIndex processedByteLen
= 0;
245 CFIndex length
= (maxCharLen
&& (maxCharLen
< numBytes
) ? maxCharLen
: numBytes
);
248 while (processedByteLen
< length
) {
249 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
251 if (maxCharLen
) characters
[processedByteLen
] = character
;
255 *usedCharLen
= processedByteLen
;
256 return processedByteLen
;
259 static CFIndex
__CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
260 CFIndex processedByteLen
= 0;
261 CFIndex theUsedCharLen
= 0;
262 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
265 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
267 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
268 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
270 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
273 usedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
274 *usedCharLen
= theUsedCharLen
;
276 for (idx
= 0;idx
< usedLen
;idx
++) {
277 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
278 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
281 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
282 *(characters
++) = (UniChar
)(charBuffer
[idx
] >> 10) + 0xD800UL
;
283 *(characters
++) = (UniChar
)(charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
286 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
288 *(characters
++) = charBuffer
[idx
];
292 if (maxCharLen
) *(characters
++) = character
;
298 *usedCharLen
= theUsedCharLen
;
299 return processedByteLen
;
302 static CFIndex
__CFToBytesStandardEightBitWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
303 CFIndex processedCharLen
= 0;
309 while (numChars
&& (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
310 if (!(usedLen
= ((CFStringEncodingStandardEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->_toBytes
)(flags
, characters
, numChars
, &byte
))) break;
312 if (maxByteLen
) bytes
[*usedByteLen
] = byte
;
314 characters
+= usedLen
;
316 processedCharLen
+= usedLen
;
319 return processedCharLen
;
322 static CFIndex
__CFToUnicodeStandardEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
323 CFIndex processedByteLen
= 0;
325 UniChar charBuffer
[20]; // Dynamic stack allocation is GNU specific
327 UniChar charBuffer
[((const _CFEncodingConverter
*)converter
)->maxLen
];
333 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
334 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
339 if (*usedCharLen
+ usedLen
> maxCharLen
) break;
341 for (idx
= 0;idx
< usedLen
;idx
++) {
342 characters
[*usedCharLen
+ idx
] = charBuffer
[idx
];
345 *usedCharLen
+= usedLen
;
349 return processedByteLen
;
352 static CFIndex
__CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
353 CFIndex processedByteLen
= 0;
355 UniChar charBuffer
[20]; // Dynamic stack allocation is GNU specific
357 UniChar charBuffer
[((const _CFEncodingConverter
*)converter
)->maxLen
];
359 UTF32Char decompBuffer
[MAX_DECOMPOSED_LENGTH
];
362 CFIndex idx
, decompIndex
;
363 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
364 CFIndex theUsedCharLen
= 0;
366 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
367 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
369 for (idx
= 0;idx
< usedLen
;idx
++) {
370 if (CFUniCharIsDecomposableCharacter(charBuffer
[idx
], isHFSPlus
)) {
371 decompedLen
= CFUniCharDecomposeCharacter(charBuffer
[idx
], decompBuffer
, MAX_DECOMPOSED_LENGTH
);
372 *usedCharLen
= theUsedCharLen
;
374 for (decompIndex
= 0;decompIndex
< decompedLen
;decompIndex
++) {
375 if (decompBuffer
[decompIndex
] > 0xFFFF) { // Non-BMP
376 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
379 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
380 *(characters
++) = (charBuffer
[idx
] >> 10) + 0xD800UL
;
381 *(characters
++) = (charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
384 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
386 *(characters
++) = charBuffer
[idx
];
390 if (maxCharLen
) *(characters
++) = charBuffer
[idx
];
397 *usedCharLen
= theUsedCharLen
;
398 return processedByteLen
;
401 static CFIndex
__CFToBytesCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
402 CFIndex processedCharLen
= 0;
404 uint8_t byteBuffer
[20]; // Dynamic stack allocation is GNU specific
406 uint8_t byteBuffer
[((const _CFEncodingConverter
*)converter
)->maxLen
];
412 while ((processedCharLen
< numChars
) && (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
413 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToBytesProc
)((const _CFEncodingConverter
*)converter
)->_toBytes
)(flags
, characters
[processedCharLen
], byteBuffer
))) break;
418 if (*usedByteLen
+ usedLen
> maxByteLen
) break;
420 for (idx
= 0;idx
<usedLen
;idx
++) {
421 bytes
[*usedByteLen
+ idx
] = byteBuffer
[idx
];
425 *usedByteLen
+= usedLen
;
429 return processedCharLen
;
432 static CFIndex
__CFToUnicodeCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
433 CFIndex processedByteLen
= 0;
439 while (numBytes
&& (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
440 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
442 if (maxCharLen
) *(characters
++) = character
;
444 processedByteLen
+= usedLen
;
449 return processedByteLen
;
452 static CFIndex
__CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
453 CFIndex processedByteLen
= 0;
454 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
457 CFIndex decomposedLen
;
458 CFIndex theUsedCharLen
= 0;
459 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
461 while (numBytes
&& (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
462 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->_toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
464 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
467 decomposedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
468 *usedCharLen
= theUsedCharLen
;
470 for (idx
= 0;idx
< decomposedLen
;idx
++) {
471 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
472 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
475 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
476 *(characters
++) = (UniChar
)(charBuffer
[idx
] >> 10) + 0xD800UL
;
477 *(characters
++) = (UniChar
)(charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
480 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
482 *(characters
++) = charBuffer
[idx
];
486 if (maxCharLen
) *(characters
++) = character
;
490 processedByteLen
+= usedLen
;
494 *usedCharLen
= theUsedCharLen
;
495 return processedByteLen
;
500 static _CFConverterEntry __CFConverterEntryASCII
= {
501 kCFStringEncodingASCII
, NULL
,
502 "Western (ASCII)", {"us-ascii", "ascii", "iso-646-us", NULL
}, NULL
, NULL
, NULL
, NULL
,
503 kCFStringEncodingMacRoman
// We use string encoding's script range here
506 static _CFConverterEntry __CFConverterEntryISOLatin1
= {
507 kCFStringEncodingISOLatin1
, NULL
,
508 "Western (ISO Latin 1)", {"iso-8859-1", "latin1","iso-latin-1", NULL
}, NULL
, NULL
, NULL
, NULL
,
509 kCFStringEncodingMacRoman
// We use string encoding's script range here
512 static _CFConverterEntry __CFConverterEntryMacRoman
= {
513 kCFStringEncodingMacRoman
, NULL
,
514 "Western (Mac OS Roman)", {"macintosh", "mac", "x-mac-roman", NULL
}, NULL
, NULL
, NULL
, NULL
,
515 kCFStringEncodingMacRoman
// We use string encoding's script range here
518 static _CFConverterEntry __CFConverterEntryWinLatin1
= {
519 kCFStringEncodingWindowsLatin1
, NULL
,
520 "Western (Windows Latin 1)", {"windows-1252", "cp1252", "windows latin1", NULL
}, NULL
, NULL
, NULL
, NULL
,
521 kCFStringEncodingMacRoman
// We use string encoding's script range here
524 static _CFConverterEntry __CFConverterEntryNextStepLatin
= {
525 kCFStringEncodingNextStepLatin
, NULL
,
526 "Western (NextStep)", {"x-nextstep", NULL
, NULL
, NULL
}, NULL
, NULL
, NULL
, NULL
,
527 kCFStringEncodingMacRoman
// We use string encoding's script range here
530 static _CFConverterEntry __CFConverterEntryUTF8
= {
531 kCFStringEncodingUTF8
, NULL
,
532 "UTF-8", {"utf-8", "unicode-1-1-utf8", NULL
, NULL
}, NULL
, NULL
, NULL
, NULL
,
533 kCFStringEncodingUnicode
// We use string encoding's script range here
536 CF_INLINE _CFConverterEntry
*__CFStringEncodingConverterGetEntry(uint32_t encoding
) {
538 case kCFStringEncodingInvalidId
:
539 case kCFStringEncodingASCII
:
540 return &__CFConverterEntryASCII
;
542 case kCFStringEncodingISOLatin1
:
543 return &__CFConverterEntryISOLatin1
;
545 case kCFStringEncodingMacRoman
:
546 return &__CFConverterEntryMacRoman
;
548 case kCFStringEncodingWindowsLatin1
:
549 return &__CFConverterEntryWinLatin1
;
551 case kCFStringEncodingNextStepLatin
:
552 return &__CFConverterEntryNextStepLatin
;
554 case kCFStringEncodingUTF8
:
555 return &__CFConverterEntryUTF8
;
563 CF_INLINE _CFEncodingConverter
*__CFEncodingConverterFromDefinition(const CFStringEncodingConverter
*definition
) {
564 #define NUM_OF_ENTRIES_CYCLE (10)
565 static CFSpinLock_t _indexLock
= CFSpinLockInit
;
566 static uint32_t _currentIndex
= 0;
567 static uint32_t _allocatedSize
= 0;
568 static _CFEncodingConverter
*_allocatedEntries
= NULL
;
569 _CFEncodingConverter
*converter
;
572 __CFSpinLock(&_indexLock
);
573 if ((_currentIndex
+ 1) >= _allocatedSize
) {
576 _allocatedEntries
= NULL
;
578 if (_allocatedEntries
== NULL
) { // Not allocated yet
579 _allocatedEntries
= (_CFEncodingConverter
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(_CFEncodingConverter
) * NUM_OF_ENTRIES_CYCLE
, 0);
580 _allocatedSize
= NUM_OF_ENTRIES_CYCLE
;
581 converter
= &(_allocatedEntries
[_currentIndex
]);
583 converter
= &(_allocatedEntries
[++_currentIndex
]);
585 __CFSpinUnlock(&_indexLock
);
587 switch (definition
->encodingClass
) {
588 case kCFStringEncodingConverterStandard
:
589 converter
->toBytes
= (_CFToBytesProc
)definition
->toBytes
;
590 converter
->toUnicode
= (_CFToUnicodeProc
)definition
->toUnicode
;
591 converter
->toCanonicalUnicode
= (_CFToUnicodeProc
)definition
->toUnicode
;
592 converter
->_toBytes
= NULL
;
593 converter
->_toUnicode
= NULL
;
594 converter
->maxLen
= 2;
597 case kCFStringEncodingConverterCheapEightBit
:
598 converter
->toBytes
= __CFToBytesCheapEightBitWrapper
;
599 converter
->toUnicode
= __CFToUnicodeCheapEightBitWrapper
;
600 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapEightBitWrapper
;
601 converter
->_toBytes
= definition
->toBytes
;
602 converter
->_toUnicode
= definition
->toUnicode
;
603 converter
->maxLen
= 1;
606 case kCFStringEncodingConverterStandardEightBit
:
607 converter
->toBytes
= __CFToBytesStandardEightBitWrapper
;
608 converter
->toUnicode
= __CFToUnicodeStandardEightBitWrapper
;
609 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeStandardEightBitWrapper
;
610 converter
->_toBytes
= definition
->toBytes
;
611 converter
->_toUnicode
= definition
->toUnicode
;
612 converter
->maxLen
= definition
->maxDecomposedCharLen
;
615 case kCFStringEncodingConverterCheapMultiByte
:
616 converter
->toBytes
= __CFToBytesCheapMultiByteWrapper
;
617 converter
->toUnicode
= __CFToUnicodeCheapMultiByteWrapper
;
618 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapMultiByteWrapper
;
619 converter
->_toBytes
= definition
->toBytes
;
620 converter
->_toUnicode
= definition
->toUnicode
;
621 converter
->maxLen
= definition
->maxBytesPerChar
;
624 case kCFStringEncodingConverterPlatformSpecific
:
625 converter
->toBytes
= NULL
;
626 converter
->toUnicode
= NULL
;
627 converter
->toCanonicalUnicode
= NULL
;
628 converter
->_toBytes
= NULL
;
629 converter
->_toUnicode
= NULL
;
630 converter
->maxLen
= 0;
631 converter
->toBytesLen
= NULL
;
632 converter
->toUnicodeLen
= NULL
;
633 converter
->toBytesFallback
= NULL
;
634 converter
->toUnicodeFallback
= NULL
;
635 converter
->toBytesPrecompose
= NULL
;
636 converter
->isValidCombiningChar
= NULL
;
639 default: // Shouln't be here
643 converter
->toBytesLen
= (definition
->toBytesLen
? definition
->toBytesLen
: (CFStringEncodingToBytesLenProc
)(uintptr_t)definition
->maxBytesPerChar
);
644 converter
->toUnicodeLen
= (definition
->toUnicodeLen
? definition
->toUnicodeLen
: (CFStringEncodingToUnicodeLenProc
)(uintptr_t)definition
->maxDecomposedCharLen
);
645 converter
->toBytesFallback
= (definition
->toBytesFallback
? definition
->toBytesFallback
: __CFDefaultToBytesFallbackProc
);
646 converter
->toUnicodeFallback
= (definition
->toUnicodeFallback
? definition
->toUnicodeFallback
: __CFDefaultToUnicodeFallbackProc
);
647 converter
->toBytesPrecompose
= (definition
->toBytesPrecompose
? definition
->toBytesPrecompose
: NULL
);
648 converter
->isValidCombiningChar
= (definition
->isValidCombiningChar
? definition
->isValidCombiningChar
: NULL
);
653 CF_INLINE
const CFStringEncodingConverter
*__CFStringEncodingConverterGetDefinition(_CFConverterEntry
*entry
) {
654 if (!entry
) return NULL
;
656 switch (entry
->encoding
) {
657 case kCFStringEncodingASCII
:
658 return &__CFConverterASCII
;
660 case kCFStringEncodingISOLatin1
:
661 return &__CFConverterISOLatin1
;
663 case kCFStringEncodingMacRoman
:
664 return &__CFConverterMacRoman
;
666 case kCFStringEncodingWindowsLatin1
:
667 return &__CFConverterWinLatin1
;
669 case kCFStringEncodingNextStepLatin
:
670 return &__CFConverterNextStepLatin
;
672 case kCFStringEncodingUTF8
:
673 return &__CFConverterUTF8
;
680 static const _CFEncodingConverter
*__CFGetConverter(uint32_t encoding
) {
681 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
683 if (!entry
) return NULL
;
685 if (!entry
->converter
) {
686 const CFStringEncodingConverter
*definition
= __CFStringEncodingConverterGetDefinition(entry
);
689 entry
->converter
= __CFEncodingConverterFromDefinition(definition
);
690 entry
->toBytesFallback
= definition
->toBytesFallback
;
691 entry
->toUnicodeFallback
= definition
->toUnicodeFallback
;
695 return (_CFEncodingConverter
*)entry
->converter
;
700 uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, CFIndex
*usedCharLen
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
701 if (encoding
== kCFStringEncodingUTF8
) {
702 static CFStringEncodingToBytesProc __CFToUTF8
= NULL
;
703 CFIndex convertedCharLen
;
707 if ((flags
& kCFStringEncodingUseCanonical
) || (flags
& kCFStringEncodingUseHFSPlusCanonical
)) {
708 (void)CFUniCharDecompose(characters
, numChars
, &convertedCharLen
, (void *)bytes
, maxByteLen
, &usedLen
, true, kCFUniCharUTF8Format
, (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false));
711 const CFStringEncodingConverter
*utf8Converter
= CFStringEncodingGetConverter(kCFStringEncodingUTF8
);
712 __CFToUTF8
= (CFStringEncodingToBytesProc
)utf8Converter
->toBytes
;
714 convertedCharLen
= __CFToUTF8(0, characters
, numChars
, bytes
, maxByteLen
, &usedLen
);
716 if (usedCharLen
) *usedCharLen
= convertedCharLen
;
717 if (usedByteLen
) *usedByteLen
= usedLen
;
719 if (convertedCharLen
== numChars
) {
720 return kCFStringEncodingConversionSuccess
;
721 } else if (maxByteLen
&& (maxByteLen
== usedLen
)) {
722 return kCFStringEncodingInsufficientOutputBufferLength
;
724 return kCFStringEncodingInvalidInputStream
;
727 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
729 CFIndex localUsedByteLen
;
730 CFIndex theUsedByteLen
= 0;
731 uint32_t theResult
= kCFStringEncodingConversionSuccess
;
732 CFStringEncodingToBytesPrecomposeProc toBytesPrecompose
= NULL
;
733 CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar
= NULL
;
735 if (!converter
) return kCFStringEncodingConverterUnavailable
;
737 if (flags
& kCFStringEncodingSubstituteCombinings
) {
738 if (!(flags
& kCFStringEncodingAllowLossyConversion
)) isValidCombiningChar
= converter
->isValidCombiningChar
;
740 isValidCombiningChar
= converter
->isValidCombiningChar
;
741 if (!(flags
& kCFStringEncodingIgnoreCombinings
)) {
742 toBytesPrecompose
= converter
->toBytesPrecompose
;
743 flags
|= kCFStringEncodingComposeCombinings
;
748 while ((usedLen
< numChars
) && (!maxByteLen
|| (theUsedByteLen
< maxByteLen
))) {
749 if ((usedLen
+= TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) < numChars
) {
752 if (isValidCombiningChar
&& (usedLen
> 0) && isValidCombiningChar(characters
[usedLen
])) {
753 if (toBytesPrecompose
) {
754 CFIndex localUsedLen
= usedLen
;
756 while (isValidCombiningChar(characters
[--usedLen
]));
757 theUsedByteLen
+= localUsedByteLen
;
758 if (converter
->maxLen
> 1) {
759 TO_BYTE(converter
, flags
, characters
+ usedLen
, localUsedLen
- usedLen
, NULL
, 0, &localUsedByteLen
);
760 theUsedByteLen
-= localUsedByteLen
;
764 if ((localUsedLen
= toBytesPrecompose(flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) > 0) {
765 usedLen
+= localUsedLen
;
766 if ((usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
])) { // There is a non-base char not combined remaining
767 theUsedByteLen
+= localUsedByteLen
;
768 theResult
= kCFStringEncodingInvalidInputStream
;
771 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
772 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
775 while (isValidCombiningChar(characters
[++usedLen
]));
776 localUsedByteLen
= 1;
777 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
780 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
783 theResult
= kCFStringEncodingInvalidInputStream
;
786 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
787 theUsedByteLen
+= localUsedByteLen
;
788 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
790 } else if (flags
& kCFStringEncodingIgnoreCombinings
) {
791 while ((++usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
]));
793 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
795 theUsedByteLen
+= localUsedByteLen
;
798 localUsedByteLen
= 1;
799 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
801 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
804 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
805 theUsedByteLen
+= localUsedByteLen
;
807 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
808 CFIndex localUsedLen
;
810 localUsedByteLen
= 0;
811 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
813 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
815 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
816 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
818 theUsedByteLen
+= localUsedByteLen
;
821 localUsedByteLen
= 1;
822 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
824 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
827 theUsedByteLen
+= localUsedByteLen
;
828 theResult
= kCFStringEncodingInvalidInputStream
;
832 theUsedByteLen
+= localUsedByteLen
;
835 if (usedLen
< numChars
&& maxByteLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
836 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
837 CFIndex localUsedLen
;
839 localUsedByteLen
= 0;
840 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
842 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
844 if (usedByteLen
) *usedByteLen
= theUsedByteLen
;
845 if (usedCharLen
) *usedCharLen
= usedLen
;
851 uint32_t CFStringEncodingBytesToUnicode(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, CFIndex
*usedByteLen
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
852 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
854 CFIndex theUsedCharLen
= 0;
855 CFIndex localUsedCharLen
;
856 uint32_t theResult
= kCFStringEncodingConversionSuccess
;
858 if (!converter
) return kCFStringEncodingConverterUnavailable
;
861 while ((usedLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
862 if ((usedLen
+= TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
)) < numBytes
) {
863 CFIndex tempUsedCharLen
;
865 if (maxCharLen
&& ((maxCharLen
== theUsedCharLen
+ localUsedCharLen
) || (((flags
& (kCFStringEncodingUseCanonical
|kCFStringEncodingUseHFSPlusCanonical
)) || (maxCharLen
== theUsedCharLen
+ localUsedCharLen
+ 1)) && TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, NULL
, 0, &tempUsedCharLen
)))) { // buffer was filled up
866 theUsedCharLen
+= localUsedCharLen
;
867 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
869 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
870 theUsedCharLen
+= localUsedCharLen
;
871 usedLen
+= TO_UNICODE_FALLBACK(converter
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
);
873 theUsedCharLen
+= localUsedCharLen
;
874 theResult
= kCFStringEncodingInvalidInputStream
;
878 theUsedCharLen
+= localUsedCharLen
;
881 if (usedLen
< numBytes
&& maxCharLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
882 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
884 if (usedCharLen
) *usedCharLen
= theUsedCharLen
;
885 if (usedByteLen
) *usedByteLen
= usedLen
;
890 __private_extern__
bool CFStringEncodingIsValidEncoding(uint32_t encoding
) {
891 return (CFStringEncodingGetConverter(encoding
) ? true : false);
894 __private_extern__
const char *CFStringEncodingName(uint32_t encoding
) {
895 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
896 if (entry
) return entry
->encodingName
;
900 __private_extern__
const char **CFStringEncodingCanonicalCharsetNames(uint32_t encoding
) {
901 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
902 if (entry
) return entry
->ianaNames
;
906 __private_extern__
uint32_t CFStringEncodingGetScriptCodeForEncoding(CFStringEncoding encoding
) {
907 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
909 return (entry
? entry
->scriptCode
: ((encoding
& 0x0FFF) == kCFStringEncodingUnicode
? kCFStringEncodingUnicode
: (encoding
< 0xFF ? encoding
: kCFStringEncodingInvalidId
)));
912 __private_extern__ CFIndex
CFStringEncodingCharLengthForBytes(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
) {
913 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
916 uintptr_t switchVal
= (uintptr_t)(converter
->toUnicodeLen
);
918 if (switchVal
< 0xFFFF) {
919 return switchVal
* numBytes
;
921 return converter
->toUnicodeLen(flags
, bytes
, numBytes
);
928 __private_extern__ CFIndex
CFStringEncodingByteLengthForCharacters(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
) {
929 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
932 uintptr_t switchVal
= (uintptr_t)(converter
->toBytesLen
);
934 if (switchVal
< 0xFFFF) {
935 return switchVal
* numChars
;
937 return converter
->toBytesLen(flags
, characters
, numChars
);
944 __private_extern__
void CFStringEncodingRegisterFallbackProcedures(uint32_t encoding
, CFStringEncodingToBytesFallbackProc toBytes
, CFStringEncodingToUnicodeFallbackProc toUnicode
) {
945 _CFConverterEntry
*entry
= __CFStringEncodingConverterGetEntry(encoding
);
947 if (entry
&& __CFGetConverter(encoding
)) {
948 ((_CFEncodingConverter
*)entry
->converter
)->toBytesFallback
= (toBytes
? toBytes
: entry
->toBytesFallback
);
949 ((_CFEncodingConverter
*)entry
->converter
)->toUnicodeFallback
= (toUnicode
? toUnicode
: entry
->toUnicodeFallback
);
953 __private_extern__
const CFStringEncodingConverter
*CFStringEncodingGetConverter(uint32_t encoding
) {
954 return __CFStringEncodingConverterGetDefinition(__CFStringEncodingConverterGetEntry(encoding
));
957 static const uint32_t __CFBuiltinEncodings
[] = {
958 kCFStringEncodingMacRoman
,
959 kCFStringEncodingWindowsLatin1
,
960 kCFStringEncodingISOLatin1
,
961 kCFStringEncodingNextStepLatin
,
962 kCFStringEncodingASCII
,
963 kCFStringEncodingUTF8
,
964 /* These seven are available only in CFString-level */
965 kCFStringEncodingNonLossyASCII
,
967 kCFStringEncodingUTF16
,
968 kCFStringEncodingUTF16BE
,
969 kCFStringEncodingUTF16LE
,
971 kCFStringEncodingUTF32
,
972 kCFStringEncodingUTF32BE
,
973 kCFStringEncodingUTF32LE
,
975 kCFStringEncodingInvalidId
,
979 __private_extern__
const uint32_t *CFStringEncodingListOfAvailableEncodings(void) {
980 return __CFBuiltinEncodings
;
987 #undef kSurrogateHighStart
988 #undef kSurrogateHighEnd
989 #undef kSurrogateLowStart
990 #undef kSurrogateLowEnd
991 #undef TO_BYTE_FALLBACK
992 #undef TO_UNICODE_FALLBACK
994 #undef NUM_OF_ENTRIES_CYCLE