2 * Copyright (c) 2009 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
23 /* CFStringEncodingConverter.c
24 Copyright (c) 1998-2009, Apple Inc. All rights reserved.
25 Responsibility: Aki Inoue
28 #include "CFInternal.h"
29 #include <CoreFoundation/CFArray.h>
30 #include <CoreFoundation/CFDictionary.h>
31 #include "CFICUConverters.h"
32 #include <CoreFoundation/CFUniChar.h>
33 #include <CoreFoundation/CFPriv.h>
34 #include "CFUnicodeDecomposition.h"
35 #include "CFStringEncodingConverterExt.h"
36 #include "CFStringEncodingConverterPriv.h"
38 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
42 typedef CFIndex (*_CFToBytesProc
)(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
);
43 typedef CFIndex (*_CFToUnicodeProc
)(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
);
46 const CFStringEncodingConverter
*definition
;
47 _CFToBytesProc toBytes
;
48 _CFToUnicodeProc toUnicode
;
49 _CFToUnicodeProc toCanonicalUnicode
;
50 CFStringEncodingToBytesFallbackProc toBytesFallback
;
51 CFStringEncodingToUnicodeFallbackProc toUnicodeFallback
;
52 } _CFEncodingConverter
;
56 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->definition->toBytes)(flags,chars,numChars,bytes,max,used))
57 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->toUnicode ? (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->definition->toUnicode)(flags,bytes,numBytes,chars,max,used))
59 #define ASCIINewLine 0x0a
60 #define kSurrogateHighStart 0xD800
61 #define kSurrogateHighEnd 0xDBFF
62 #define kSurrogateLowStart 0xDC00
63 #define kSurrogateLowEnd 0xDFFF
65 static const uint8_t __CFMaximumConvertedLength
= 20;
67 /* Mapping 128..255 to lossy ASCII
70 unsigned char chars
[4];
71 } _toLossyASCIITable
[] = {
72 {{' ', 0, 0, 0}}, // NO-BREAK SPACE
73 {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
74 {{'c', 0, 0, 0}}, // CENT SIGN
75 {{'L', 0, 0, 0}}, // POUND SIGN
76 {{'$', 0, 0, 0}}, // CURRENCY SIGN
77 {{'Y', 0, 0, 0}}, // YEN SIGN
78 {{'|', 0, 0, 0}}, // BROKEN BAR
79 {{0, 0, 0, 0}}, // SECTION SIGN
80 {{0, 0, 0, 0}}, // DIAERESIS
81 {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
82 {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
83 {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
84 {{0, 0, 0, 0}}, // NOT SIGN
85 {{'-', 0, 0, 0}}, // SOFT HYPHEN
86 {{'(', 'R', ')', 0}}, // REGISTERED SIGN
87 {{0, 0, 0, 0}}, // MACRON
88 {{0, 0, 0, 0}}, // DEGREE SIGN
89 {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
90 {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
91 {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
92 {{0, 0, 0, 0}}, // ACUTE ACCENT
93 {{0, 0, 0, 0}}, // MICRO SIGN
94 {{0, 0, 0, 0}}, // PILCROW SIGN
95 {{0, 0, 0, 0}}, // MIDDLE DOT
96 {{0, 0, 0, 0}}, // CEDILLA
97 {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
98 {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
99 {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
100 {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
101 {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
102 {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
103 {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
104 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
105 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
106 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
107 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
108 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
109 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
110 {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
111 {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
112 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
113 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
114 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
115 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
116 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
117 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
118 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
119 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
120 {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
121 {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
122 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
123 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
124 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
125 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
126 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
127 {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
128 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
129 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
130 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
131 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
132 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
133 {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
134 {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
135 {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
136 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
137 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
138 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
139 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
140 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
141 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
142 {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
143 {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
144 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
145 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
146 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
147 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
148 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
149 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
150 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
151 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
152 {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
153 {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
154 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
155 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
156 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
157 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
158 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
159 {{'/', 0, 0, 0}}, // DIVISION SIGN
160 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
161 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
162 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
163 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
164 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
165 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
166 {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
167 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
170 CF_INLINE CFIndex
__CFToASCIILatin1Fallback(UniChar character
, uint8_t *bytes
, CFIndex maxByteLen
) {
171 const uint8_t *losChars
= (const uint8_t*)_toLossyASCIITable
+ (character
- 0xA0) * sizeof(uint8_t[4]);
172 CFIndex numBytes
= 0;
173 CFIndex idx
, max
= (maxByteLen
&& (maxByteLen
< 4) ? maxByteLen
: 4);
175 for (idx
= 0;idx
< max
;idx
++) {
177 if (maxByteLen
) bytes
[idx
] = losChars
[idx
];
187 static CFIndex
__CFDefaultToBytesFallbackProc(const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
188 CFIndex processCharLen
= 1, filledBytesLen
= 1;
191 if (*characters
< 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
192 byte
= (uint8_t)(*characters
- 0x80);
193 } else if (*characters
< 0x100) {
194 *usedByteLen
= __CFToASCIILatin1Fallback(*characters
, bytes
, maxByteLen
);
196 } else if (*characters
>= kSurrogateHighStart
&& *characters
<= kSurrogateLowEnd
) {
197 processCharLen
= (numChars
> 1 && *characters
<= kSurrogateLowStart
&& *(characters
+ 1) >= kSurrogateLowStart
&& *(characters
+ 1) <= kSurrogateLowEnd
? 2 : 1);
198 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceCharacterSet
)) {
200 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceAndNewlineCharacterSet
)) {
202 } else if (*characters
== 0x2026) { // ellipsis
203 if (0 == maxByteLen
) {
205 } else if (maxByteLen
> 2) {
206 memset(bytes
, '.', 3);
208 return processCharLen
;
210 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharDecomposableCharacterSet
)) {
211 UTF32Char decomposed
[MAX_DECOMPOSED_LENGTH
];
213 (void)CFUniCharDecomposeCharacter(*characters
, decomposed
, MAX_DECOMPOSED_LENGTH
);
214 if (*decomposed
< 0x80) {
215 byte
= (uint8_t)(*decomposed
);
217 UTF16Char theChar
= *decomposed
;
219 return __CFDefaultToBytesFallbackProc(&theChar
, 1, bytes
, maxByteLen
, usedByteLen
);
223 if (maxByteLen
) *bytes
= byte
;
224 *usedByteLen
= filledBytesLen
;
225 return processCharLen
;
228 static CFIndex
__CFDefaultToUnicodeFallbackProc(const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
229 if (maxCharLen
) *characters
= (UniChar
)'?';
234 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
235 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
237 #define EXTRA_BASE (0x0F00)
239 /* Wrapper funcs for non-standard converters
241 static CFIndex
__CFToBytesCheapEightBitWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
242 CFIndex processedCharLen
= 0;
243 CFIndex length
= (maxByteLen
&& (maxByteLen
< numChars
) ? maxByteLen
: numChars
);
246 while (processedCharLen
< length
) {
247 if (!((CFStringEncodingCheapEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
[processedCharLen
], &byte
)) break;
249 if (maxByteLen
) bytes
[processedCharLen
] = byte
;
253 *usedByteLen
= processedCharLen
;
254 return processedCharLen
;
257 static CFIndex
__CFToUnicodeCheapEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
258 CFIndex processedByteLen
= 0;
259 CFIndex length
= (maxCharLen
&& (maxCharLen
< numBytes
) ? maxCharLen
: numBytes
);
262 while (processedByteLen
< length
) {
263 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
265 if (maxCharLen
) characters
[processedByteLen
] = character
;
269 *usedCharLen
= processedByteLen
;
270 return processedByteLen
;
273 static CFIndex
__CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
274 CFIndex processedByteLen
= 0;
275 CFIndex theUsedCharLen
= 0;
276 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
279 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
281 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
282 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
284 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
287 usedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
288 *usedCharLen
= theUsedCharLen
;
290 for (idx
= 0;idx
< usedLen
;idx
++) {
291 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
292 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
295 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
296 *(characters
++) = (UniChar
)(charBuffer
[idx
] >> 10) + 0xD800UL
;
297 *(characters
++) = (UniChar
)(charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
300 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
302 *(characters
++) = charBuffer
[idx
];
306 if (maxCharLen
) *(characters
++) = character
;
312 *usedCharLen
= theUsedCharLen
;
313 return processedByteLen
;
316 static CFIndex
__CFToBytesStandardEightBitWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
317 CFIndex processedCharLen
= 0;
323 while (numChars
&& (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
324 if (!(usedLen
= ((CFStringEncodingStandardEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
, numChars
, &byte
))) break;
326 if (maxByteLen
) bytes
[*usedByteLen
] = byte
;
328 characters
+= usedLen
;
330 processedCharLen
+= usedLen
;
333 return processedCharLen
;
336 static CFIndex
__CFToUnicodeStandardEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
337 CFIndex processedByteLen
= 0;
338 UniChar charBuffer
[__CFMaximumConvertedLength
];
343 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
344 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
349 if (*usedCharLen
+ usedLen
> maxCharLen
) break;
351 for (idx
= 0;idx
< usedLen
;idx
++) {
352 characters
[*usedCharLen
+ idx
] = charBuffer
[idx
];
355 *usedCharLen
+= usedLen
;
359 return processedByteLen
;
362 static CFIndex
__CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
363 CFIndex processedByteLen
= 0;
364 UniChar charBuffer
[__CFMaximumConvertedLength
];
365 UTF32Char decompBuffer
[MAX_DECOMPOSED_LENGTH
];
368 CFIndex idx
, decompIndex
;
369 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
370 CFIndex theUsedCharLen
= 0;
372 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
373 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
375 for (idx
= 0;idx
< usedLen
;idx
++) {
376 if (CFUniCharIsDecomposableCharacter(charBuffer
[idx
], isHFSPlus
)) {
377 decompedLen
= CFUniCharDecomposeCharacter(charBuffer
[idx
], decompBuffer
, MAX_DECOMPOSED_LENGTH
);
378 *usedCharLen
= theUsedCharLen
;
380 for (decompIndex
= 0;decompIndex
< decompedLen
;decompIndex
++) {
381 if (decompBuffer
[decompIndex
] > 0xFFFF) { // Non-BMP
382 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
385 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
386 *(characters
++) = (charBuffer
[idx
] >> 10) + 0xD800UL
;
387 *(characters
++) = (charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
390 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
392 *(characters
++) = charBuffer
[idx
];
396 if (maxCharLen
) *(characters
++) = charBuffer
[idx
];
403 *usedCharLen
= theUsedCharLen
;
404 return processedByteLen
;
407 static CFIndex
__CFToBytesCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
408 CFIndex processedCharLen
= 0;
409 uint8_t byteBuffer
[__CFMaximumConvertedLength
];
414 while ((processedCharLen
< numChars
) && (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
415 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
[processedCharLen
], byteBuffer
))) break;
420 if (*usedByteLen
+ usedLen
> maxByteLen
) break;
422 for (idx
= 0;idx
<usedLen
;idx
++) {
423 bytes
[*usedByteLen
+ idx
] = byteBuffer
[idx
];
427 *usedByteLen
+= usedLen
;
431 return processedCharLen
;
434 static CFIndex
__CFToUnicodeCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
435 CFIndex processedByteLen
= 0;
441 while (numBytes
&& (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
442 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
444 if (maxCharLen
) *(characters
++) = character
;
446 processedByteLen
+= usedLen
;
451 return processedByteLen
;
454 static CFIndex
__CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
455 CFIndex processedByteLen
= 0;
456 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
459 CFIndex decomposedLen
;
460 CFIndex theUsedCharLen
= 0;
461 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
463 while (numBytes
&& (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
464 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
466 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
469 decomposedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
470 *usedCharLen
= theUsedCharLen
;
472 for (idx
= 0;idx
< decomposedLen
;idx
++) {
473 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
474 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
477 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
478 *(characters
++) = (UniChar
)(charBuffer
[idx
] >> 10) + 0xD800UL
;
479 *(characters
++) = (UniChar
)(charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
482 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
484 *(characters
++) = charBuffer
[idx
];
488 if (maxCharLen
) *(characters
++) = character
;
492 processedByteLen
+= usedLen
;
496 *usedCharLen
= theUsedCharLen
;
497 return processedByteLen
;
502 CF_INLINE _CFEncodingConverter
*__CFEncodingConverterFromDefinition(const CFStringEncodingConverter
*definition
, CFStringEncoding encoding
) {
503 #define NUM_OF_ENTRIES_CYCLE (10)
504 static uint32_t _currentIndex
= 0;
505 static uint32_t _allocatedSize
= 0;
506 static _CFEncodingConverter
*_allocatedEntries
= NULL
;
507 _CFEncodingConverter
*converter
;
510 if ((_currentIndex
+ 1) >= _allocatedSize
) {
513 _allocatedEntries
= NULL
;
515 if (_allocatedEntries
== NULL
) { // Not allocated yet
516 _allocatedEntries
= (_CFEncodingConverter
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(_CFEncodingConverter
) * NUM_OF_ENTRIES_CYCLE
, 0);
517 _allocatedSize
= NUM_OF_ENTRIES_CYCLE
;
518 converter
= &(_allocatedEntries
[_currentIndex
]);
520 converter
= &(_allocatedEntries
[++_currentIndex
]);
523 memset(converter
, 0, sizeof(_CFEncodingConverter
));
525 converter
->definition
= definition
;
527 switch (definition
->encodingClass
) {
528 case kCFStringEncodingConverterStandard
:
529 converter
->toBytes
= NULL
;
530 converter
->toUnicode
= NULL
;
531 converter
->toCanonicalUnicode
= NULL
;
534 case kCFStringEncodingConverterCheapEightBit
:
535 converter
->toBytes
= __CFToBytesCheapEightBitWrapper
;
536 converter
->toUnicode
= __CFToUnicodeCheapEightBitWrapper
;
537 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapEightBitWrapper
;
540 case kCFStringEncodingConverterStandardEightBit
:
541 converter
->toBytes
= __CFToBytesStandardEightBitWrapper
;
542 converter
->toUnicode
= __CFToUnicodeStandardEightBitWrapper
;
543 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeStandardEightBitWrapper
;
546 case kCFStringEncodingConverterCheapMultiByte
:
547 converter
->toBytes
= __CFToBytesCheapMultiByteWrapper
;
548 converter
->toUnicode
= __CFToUnicodeCheapMultiByteWrapper
;
549 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapMultiByteWrapper
;
552 case kCFStringEncodingConverterICU
:
553 converter
->toBytes
= (_CFToBytesProc
)__CFStringEncodingGetICUName(encoding
);
556 case kCFStringEncodingConverterPlatformSpecific
:
559 default: // Shouln't be here
563 converter
->toBytesFallback
= (definition
->toBytesFallback
? definition
->toBytesFallback
: __CFDefaultToBytesFallbackProc
);
564 converter
->toUnicodeFallback
= (definition
->toUnicodeFallback
? definition
->toUnicodeFallback
: __CFDefaultToUnicodeFallbackProc
);
569 CF_INLINE
const CFStringEncodingConverter
*__CFStringEncodingConverterGetDefinition(CFStringEncoding encoding
) {
571 case kCFStringEncodingUTF8
:
572 return &__CFConverterUTF8
;
574 case kCFStringEncodingMacRoman
:
575 return &__CFConverterMacRoman
;
577 case kCFStringEncodingWindowsLatin1
:
578 return &__CFConverterWinLatin1
;
580 case kCFStringEncodingASCII
:
581 return &__CFConverterASCII
;
583 case kCFStringEncodingISOLatin1
:
584 return &__CFConverterISOLatin1
;
587 case kCFStringEncodingNextStepLatin
:
588 return &__CFConverterNextStepLatin
;
592 return __CFStringEncodingGetExternalConverter(encoding
);
596 static const _CFEncodingConverter
*__CFGetConverter(uint32_t encoding
) {
597 const _CFEncodingConverter
*converter
= NULL
;
598 const _CFEncodingConverter
**commonConverterSlot
= NULL
;
599 static _CFEncodingConverter
*commonConverters
[3] = {NULL
, NULL
, NULL
}; // UTF8, MacRoman/WinLatin1, and the default encoding*
600 static CFMutableDictionaryRef mappingTable
= NULL
;
601 static CFSpinLock_t lock
= CFSpinLockInit
;
604 case kCFStringEncodingUTF8
: commonConverterSlot
= (const _CFEncodingConverter
**)&(commonConverters
[0]); break;
606 /* the swith here should avoid possible bootstrap issues in the default: case below when invoked from CFStringGetSystemEncoding() */
607 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
608 case kCFStringEncodingMacRoman
: commonConverterSlot
= (const _CFEncodingConverter
**)&(commonConverters
[1]); break;
609 #elif DEPLOYMENT_TARGET_WINDOWS
610 case kCFStringEncodingWindowsLatin1
: commonConverterSlot
= (const _CFEncodingConverter
**)(&(commonConverters
[1])); break;
612 #warning This case must match __defaultEncoding value defined in CFString.c
613 case kCFStringEncodingISOLatin1
: commonConverterSlot
= (const _CFEncodingConverter
**)(&(commonConverters
[1])); break;
614 #endif /* DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED */
616 default: if (CFStringGetSystemEncoding() == encoding
) commonConverterSlot
= (const _CFEncodingConverter
**)&(commonConverters
[2]); break;
620 converter
= ((NULL
== commonConverterSlot
) ? ((NULL
== mappingTable
) ? NULL
: (const _CFEncodingConverter
*)CFDictionaryGetValue(mappingTable
, (const void *)(uintptr_t)encoding
)) : *commonConverterSlot
);
621 __CFSpinUnlock(&lock
);
623 if (NULL
== converter
) {
624 const CFStringEncodingConverter
*definition
= __CFStringEncodingConverterGetDefinition(encoding
);
626 if (NULL
!= definition
) {
628 converter
= ((NULL
== commonConverterSlot
) ? ((NULL
== mappingTable
) ? NULL
: (const _CFEncodingConverter
*)CFDictionaryGetValue(mappingTable
, (const void *)(uintptr_t)encoding
)) : *commonConverterSlot
);
630 if (NULL
== converter
) {
631 converter
= __CFEncodingConverterFromDefinition(definition
, encoding
);
633 if (NULL
== commonConverterSlot
) {
634 if (NULL
== mappingTable
) mappingTable
= CFDictionaryCreateMutable(NULL
, 0, NULL
, NULL
);
636 CFDictionarySetValue(mappingTable
, (const void *)(uintptr_t)encoding
, converter
);
638 *commonConverterSlot
= converter
;
641 __CFSpinUnlock(&lock
);
650 uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, CFIndex
*usedCharLen
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
651 if (encoding
== kCFStringEncodingUTF8
) {
652 static CFStringEncodingToBytesProc __CFToUTF8
= NULL
;
653 CFIndex convertedCharLen
;
657 if ((flags
& kCFStringEncodingUseCanonical
) || (flags
& kCFStringEncodingUseHFSPlusCanonical
)) {
658 (void)CFUniCharDecompose(characters
, numChars
, &convertedCharLen
, (void *)bytes
, maxByteLen
, &usedLen
, true, kCFUniCharUTF8Format
, (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false));
661 const CFStringEncodingConverter
*utf8Converter
= CFStringEncodingGetConverter(kCFStringEncodingUTF8
);
662 __CFToUTF8
= (CFStringEncodingToBytesProc
)utf8Converter
->toBytes
;
664 convertedCharLen
= __CFToUTF8(0, characters
, numChars
, bytes
, maxByteLen
, &usedLen
);
666 if (usedCharLen
) *usedCharLen
= convertedCharLen
;
667 if (usedByteLen
) *usedByteLen
= usedLen
;
669 if (convertedCharLen
== numChars
) {
670 return kCFStringEncodingConversionSuccess
;
671 } else if ((maxByteLen
> 0) && ((maxByteLen
- usedLen
) < 10)) { // could be filled outbuf
672 UTF16Char character
= characters
[convertedCharLen
];
674 if (((character
>= kSurrogateLowStart
) && (character
<= kSurrogateLowEnd
)) || ((character
>= kSurrogateHighStart
) && (character
<= kSurrogateHighEnd
) && ((1 == (numChars
- convertedCharLen
)) || (characters
[convertedCharLen
+ 1] < kSurrogateLowStart
) || (characters
[convertedCharLen
+ 1] > kSurrogateLowEnd
)))) return kCFStringEncodingInvalidInputStream
;
676 return kCFStringEncodingInsufficientOutputBufferLength
;
678 return kCFStringEncodingInvalidInputStream
;
681 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
683 CFIndex localUsedByteLen
;
684 CFIndex theUsedByteLen
= 0;
685 uint32_t theResult
= kCFStringEncodingConversionSuccess
;
686 CFStringEncodingToBytesPrecomposeProc toBytesPrecompose
= NULL
;
687 CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar
= NULL
;
689 if (!converter
) return kCFStringEncodingConverterUnavailable
;
691 if (flags
& kCFStringEncodingSubstituteCombinings
) {
692 if (!(flags
& kCFStringEncodingAllowLossyConversion
)) isValidCombiningChar
= converter
->definition
->isValidCombiningChar
;
694 isValidCombiningChar
= converter
->definition
->isValidCombiningChar
;
695 if (!(flags
& kCFStringEncodingIgnoreCombinings
)) {
696 toBytesPrecompose
= converter
->definition
->toBytesPrecompose
;
697 flags
|= kCFStringEncodingComposeCombinings
;
701 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUToBytes((const char *)converter
->toBytes
, flags
, characters
, numChars
, usedCharLen
, bytes
, maxByteLen
, usedByteLen
);
703 /* Platform converter */
704 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformUnicodeToBytes(encoding
, flags
, characters
, numChars
, usedCharLen
, bytes
, maxByteLen
, usedByteLen
);
706 while ((usedLen
< numChars
) && (!maxByteLen
|| (theUsedByteLen
< maxByteLen
))) {
707 if ((usedLen
+= TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) < numChars
) {
710 if (isValidCombiningChar
&& (usedLen
> 0) && isValidCombiningChar(characters
[usedLen
])) {
711 if (toBytesPrecompose
) {
712 CFIndex localUsedLen
= usedLen
;
714 while (isValidCombiningChar(characters
[--usedLen
]));
715 theUsedByteLen
+= localUsedByteLen
;
716 if (converter
->definition
->maxBytesPerChar
> 1) {
717 TO_BYTE(converter
, flags
, characters
+ usedLen
, localUsedLen
- usedLen
, NULL
, 0, &localUsedByteLen
);
718 theUsedByteLen
-= localUsedByteLen
;
722 if ((localUsedLen
= toBytesPrecompose(flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) > 0) {
723 usedLen
+= localUsedLen
;
724 if ((usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
])) { // There is a non-base char not combined remaining
725 theUsedByteLen
+= localUsedByteLen
;
726 theResult
= kCFStringEncodingInvalidInputStream
;
729 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
730 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
733 while (isValidCombiningChar(characters
[++usedLen
]));
734 localUsedByteLen
= 1;
735 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
738 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
741 theResult
= kCFStringEncodingInvalidInputStream
;
744 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
745 theUsedByteLen
+= localUsedByteLen
;
746 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
748 } else if (flags
& kCFStringEncodingIgnoreCombinings
) {
749 while ((++usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
]));
751 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
753 theUsedByteLen
+= localUsedByteLen
;
756 localUsedByteLen
= 1;
757 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
759 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
762 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
763 theUsedByteLen
+= localUsedByteLen
;
765 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
766 CFIndex localUsedLen
;
768 localUsedByteLen
= 0;
769 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
771 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
773 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
774 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
776 theUsedByteLen
+= localUsedByteLen
;
779 localUsedByteLen
= 1;
780 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
782 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
785 theUsedByteLen
+= localUsedByteLen
;
786 theResult
= kCFStringEncodingInvalidInputStream
;
790 theUsedByteLen
+= localUsedByteLen
;
793 if (usedLen
< numChars
&& maxByteLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
794 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
795 CFIndex localUsedLen
;
797 localUsedByteLen
= 0;
798 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
800 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
802 if (usedByteLen
) *usedByteLen
= theUsedByteLen
;
803 if (usedCharLen
) *usedCharLen
= usedLen
;
809 uint32_t CFStringEncodingBytesToUnicode(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, CFIndex
*usedByteLen
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
810 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
812 CFIndex theUsedCharLen
= 0;
813 CFIndex localUsedCharLen
;
814 uint32_t theResult
= kCFStringEncodingConversionSuccess
;
816 if (!converter
) return kCFStringEncodingConverterUnavailable
;
818 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUToUnicode((const char *)converter
->toBytes
, flags
, bytes
, numBytes
, usedByteLen
, characters
, maxCharLen
, usedCharLen
);
820 /* Platform converter */
821 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformBytesToUnicode(encoding
, flags
, bytes
, numBytes
, usedByteLen
, characters
, maxCharLen
, usedCharLen
);
823 while ((usedLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
824 if ((usedLen
+= TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
)) < numBytes
) {
825 CFIndex tempUsedCharLen
;
827 if (maxCharLen
&& ((maxCharLen
== theUsedCharLen
+ localUsedCharLen
) || (((flags
& (kCFStringEncodingUseCanonical
|kCFStringEncodingUseHFSPlusCanonical
)) || (maxCharLen
== theUsedCharLen
+ localUsedCharLen
+ 1)) && TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, NULL
, 0, &tempUsedCharLen
)))) { // buffer was filled up
828 theUsedCharLen
+= localUsedCharLen
;
829 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
831 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
832 theUsedCharLen
+= localUsedCharLen
;
833 usedLen
+= TO_UNICODE_FALLBACK(converter
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
);
835 theUsedCharLen
+= localUsedCharLen
;
836 theResult
= kCFStringEncodingInvalidInputStream
;
840 theUsedCharLen
+= localUsedCharLen
;
843 if (usedLen
< numBytes
&& maxCharLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
844 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
846 if (usedCharLen
) *usedCharLen
= theUsedCharLen
;
847 if (usedByteLen
) *usedByteLen
= usedLen
;
852 __private_extern__
bool CFStringEncodingIsValidEncoding(uint32_t encoding
) {
853 return (CFStringEncodingGetConverter(encoding
) ? true : false);
856 __private_extern__ CFIndex
CFStringEncodingCharLengthForBytes(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
) {
857 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
860 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUCharLength((const char *)converter
->toBytes
, flags
, bytes
, numBytes
);
862 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformCharLengthForBytes(encoding
, flags
, bytes
, numBytes
);
864 if (1 == converter
->definition
->maxBytesPerChar
) return numBytes
;
866 if (NULL
== converter
->definition
->toUnicodeLen
) {
867 CFIndex usedByteLen
= 0;
868 CFIndex totalLength
= 0;
871 while (numBytes
> 0) {
872 usedByteLen
= TO_UNICODE(converter
, flags
, bytes
, numBytes
, NULL
, 0, &usedCharLen
);
874 bytes
+= usedByteLen
;
875 numBytes
-= usedByteLen
;
876 totalLength
+= usedCharLen
;
879 if (0 == (flags
& kCFStringEncodingAllowLossyConversion
)) return 0;
881 usedByteLen
= TO_UNICODE_FALLBACK(converter
, bytes
, numBytes
, NULL
, 0, &usedCharLen
);
883 bytes
+= usedByteLen
;
884 numBytes
-= usedByteLen
;
885 totalLength
+= usedCharLen
;
891 return converter
->definition
->toUnicodeLen(flags
, bytes
, numBytes
);
898 __private_extern__ CFIndex
CFStringEncodingByteLengthForCharacters(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
) {
899 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
902 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUByteLength((const char *)converter
->toBytes
, flags
, characters
, numChars
);
904 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformByteLengthForCharacters(encoding
, flags
, characters
, numChars
);
906 if (1 == converter
->definition
->maxBytesPerChar
) return numChars
;
908 if (NULL
== converter
->definition
->toBytesLen
) {
911 return ((kCFStringEncodingConversionSuccess
== CFStringEncodingUnicodeToBytes(encoding
, flags
, characters
, numChars
, &usedCharLen
, NULL
, 0, NULL
)) ? usedCharLen
: 0);
913 return converter
->definition
->toBytesLen(flags
, characters
, numChars
);
920 __private_extern__
void CFStringEncodingRegisterFallbackProcedures(uint32_t encoding
, CFStringEncodingToBytesFallbackProc toBytes
, CFStringEncodingToUnicodeFallbackProc toUnicode
) {
921 _CFEncodingConverter
*converter
= (_CFEncodingConverter
*)__CFGetConverter(encoding
);
923 if (NULL
!= converter
) {
924 const CFStringEncodingConverter
*body
= CFStringEncodingGetConverter(encoding
);
926 converter
->toBytesFallback
= ((NULL
== toBytes
) ? ((NULL
== body
) ? __CFDefaultToBytesFallbackProc
: body
->toBytesFallback
) : toBytes
);
927 converter
->toUnicodeFallback
= ((NULL
== toUnicode
) ? ((NULL
== body
) ? __CFDefaultToUnicodeFallbackProc
: body
->toUnicodeFallback
) : toUnicode
);
931 __private_extern__
const CFStringEncodingConverter
*CFStringEncodingGetConverter(uint32_t encoding
) {
932 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
934 return ((NULL
== converter
) ? NULL
: converter
->definition
);
937 static const CFStringEncoding __CFBuiltinEncodings
[] = {
938 kCFStringEncodingMacRoman
,
939 kCFStringEncodingWindowsLatin1
,
940 kCFStringEncodingISOLatin1
,
941 kCFStringEncodingNextStepLatin
,
942 kCFStringEncodingASCII
,
943 kCFStringEncodingUTF8
,
944 /* These seven are available only in CFString-level */
945 kCFStringEncodingNonLossyASCII
,
947 kCFStringEncodingUTF16
,
948 kCFStringEncodingUTF16BE
,
949 kCFStringEncodingUTF16LE
,
951 kCFStringEncodingUTF32
,
952 kCFStringEncodingUTF32BE
,
953 kCFStringEncodingUTF32LE
,
955 kCFStringEncodingInvalidId
,
958 static CFComparisonResult
__CFStringEncodingComparator(const void *v1
, const void *v2
, void *context
) {
959 CFComparisonResult val1
= (*(const CFStringEncoding
*)v1
) & 0xFFFF;
960 CFComparisonResult val2
= (*(const CFStringEncoding
*)v2
) & 0xFFFF;
962 return ((val1
== val2
) ? ((CFComparisonResult
)(*(const CFStringEncoding
*)v1
) - (CFComparisonResult
)(*(const CFStringEncoding
*)v2
)) : val1
- val2
);
965 static void __CFStringEncodingFliterDupes(CFStringEncoding
*encodings
, CFIndex numSlots
) {
966 CFStringEncoding last
= kCFStringEncodingInvalidId
;
967 const CFStringEncoding
*limitEncodings
= encodings
+ numSlots
;
969 while (encodings
< limitEncodings
) {
970 if (last
== *encodings
) {
971 if ((encodings
+ 1) < limitEncodings
) memmove(encodings
, encodings
+ 1, sizeof(CFStringEncoding
) * (limitEncodings
- encodings
- 1));
974 last
= *(encodings
++);
979 __private_extern__
const CFStringEncoding
*CFStringEncodingListOfAvailableEncodings(void) {
980 static const CFStringEncoding
*encodings
= NULL
;
982 if (NULL
== encodings
) {
983 CFStringEncoding
*list
= (CFStringEncoding
*)__CFBuiltinEncodings
;
984 CFIndex numICUConverters
= 0, numPlatformConverters
= 0;
985 CFStringEncoding
*icuConverters
= __CFStringEncodingCreateICUEncodings(NULL
, &numICUConverters
);
986 CFStringEncoding
*platformConverters
= __CFStringEncodingCreateListOfAvailablePlatformConverters(NULL
, &numPlatformConverters
);
988 if ((NULL
!= icuConverters
) || (NULL
!= platformConverters
)) {
989 CFIndex numSlots
= (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)) + numICUConverters
+ numPlatformConverters
;
991 list
= (CFStringEncoding
*)CFAllocatorAllocate(NULL
, sizeof(CFStringEncoding
) * numSlots
, 0);
993 memcpy(list
, __CFBuiltinEncodings
, sizeof(__CFBuiltinEncodings
));
995 if (NULL
!= icuConverters
) {
996 memcpy(list
+ (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)), icuConverters
, sizeof(CFStringEncoding
) * numICUConverters
);
997 CFAllocatorDeallocate(NULL
, icuConverters
);
1000 if (NULL
!= platformConverters
) {
1001 memcpy(list
+ (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)) + numICUConverters
, platformConverters
, sizeof(CFStringEncoding
) * numPlatformConverters
);
1002 CFAllocatorDeallocate(NULL
, platformConverters
);
1005 CFQSortArray(list
, numSlots
, sizeof(CFStringEncoding
), (CFComparatorFunction
)__CFStringEncodingComparator
, NULL
);
1006 __CFStringEncodingFliterDupes(list
, numSlots
);
1008 if (!OSAtomicCompareAndSwapPtrBarrier(NULL
, list
, (void * volatile *)&encodings
) && (list
!= __CFBuiltinEncodings
)) CFAllocatorDeallocate(NULL
, list
);
1017 #undef kSurrogateHighStart
1018 #undef kSurrogateHighEnd
1019 #undef kSurrogateLowStart
1020 #undef kSurrogateLowEnd
1021 #undef TO_BYTE_FALLBACK
1022 #undef TO_UNICODE_FALLBACK
1024 #undef NUM_OF_ENTRIES_CYCLE