2 * Copyright (c) 2012 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /* CFStringEncodingConverter.c
25 Copyright (c) 1998-2011, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFArray.h>
31 #include <CoreFoundation/CFDictionary.h>
32 #include "CFICUConverters.h"
33 #include <CoreFoundation/CFUniChar.h>
34 #include <CoreFoundation/CFPriv.h>
35 #include "CFUnicodeDecomposition.h"
36 #include "CFStringEncodingConverterExt.h"
37 #include "CFStringEncodingConverterPriv.h"
39 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
43 typedef CFIndex (*_CFToBytesProc
)(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
);
44 typedef CFIndex (*_CFToUnicodeProc
)(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
);
47 const CFStringEncodingConverter
*definition
;
48 _CFToBytesProc toBytes
;
49 _CFToUnicodeProc toUnicode
;
50 _CFToUnicodeProc toCanonicalUnicode
;
51 CFStringEncodingToBytesFallbackProc toBytesFallback
;
52 CFStringEncodingToUnicodeFallbackProc toUnicodeFallback
;
53 } _CFEncodingConverter
;
57 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->definition->toBytes)(flags,chars,numChars,bytes,max,used))
58 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->toUnicode ? (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->definition->toUnicode)(flags,bytes,numBytes,chars,max,used))
60 #define ASCIINewLine 0x0a
61 #define kSurrogateHighStart 0xD800
62 #define kSurrogateHighEnd 0xDBFF
63 #define kSurrogateLowStart 0xDC00
64 #define kSurrogateLowEnd 0xDFFF
66 static const uint8_t __CFMaximumConvertedLength
= 20;
68 /* Mapping 128..255 to lossy ASCII
71 unsigned char chars
[4];
72 } _toLossyASCIITable
[] = {
73 {{' ', 0, 0, 0}}, // NO-BREAK SPACE
74 {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
75 {{'c', 0, 0, 0}}, // CENT SIGN
76 {{'L', 0, 0, 0}}, // POUND SIGN
77 {{'$', 0, 0, 0}}, // CURRENCY SIGN
78 {{'Y', 0, 0, 0}}, // YEN SIGN
79 {{'|', 0, 0, 0}}, // BROKEN BAR
80 {{0, 0, 0, 0}}, // SECTION SIGN
81 {{0, 0, 0, 0}}, // DIAERESIS
82 {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
83 {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
84 {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
85 {{0, 0, 0, 0}}, // NOT SIGN
86 {{'-', 0, 0, 0}}, // SOFT HYPHEN
87 {{'(', 'R', ')', 0}}, // REGISTERED SIGN
88 {{0, 0, 0, 0}}, // MACRON
89 {{0, 0, 0, 0}}, // DEGREE SIGN
90 {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
91 {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
92 {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
93 {{0, 0, 0, 0}}, // ACUTE ACCENT
94 {{0, 0, 0, 0}}, // MICRO SIGN
95 {{0, 0, 0, 0}}, // PILCROW SIGN
96 {{0, 0, 0, 0}}, // MIDDLE DOT
97 {{0, 0, 0, 0}}, // CEDILLA
98 {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
99 {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
100 {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
101 {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
102 {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
103 {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
104 {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
105 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
106 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
107 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
108 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
109 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
110 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
111 {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
112 {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
113 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
114 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
115 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
116 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
117 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
118 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
119 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
120 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
121 {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
122 {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
123 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
124 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
125 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
126 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
127 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
128 {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
129 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
130 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
131 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
132 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
133 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
134 {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
135 {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
136 {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
137 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
138 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
139 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
140 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
141 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
142 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
143 {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
144 {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
145 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
146 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
147 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
148 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
149 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
150 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
151 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
152 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
153 {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
154 {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
155 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
156 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
157 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
158 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
159 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
160 {{'/', 0, 0, 0}}, // DIVISION SIGN
161 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
162 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
163 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
164 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
165 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
166 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
167 {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
168 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
171 CF_INLINE CFIndex
__CFToASCIILatin1Fallback(UniChar character
, uint8_t *bytes
, CFIndex maxByteLen
) {
172 const uint8_t *losChars
= (const uint8_t*)_toLossyASCIITable
+ (character
- 0xA0) * sizeof(uint8_t[4]);
173 CFIndex numBytes
= 0;
174 CFIndex idx
, max
= (maxByteLen
&& (maxByteLen
< 4) ? maxByteLen
: 4);
176 for (idx
= 0;idx
< max
;idx
++) {
178 if (maxByteLen
) bytes
[idx
] = losChars
[idx
];
188 static CFIndex
__CFDefaultToBytesFallbackProc(const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
189 CFIndex processCharLen
= 1, filledBytesLen
= 1;
192 if (*characters
< 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
193 byte
= (uint8_t)(*characters
- 0x80);
194 } else if (*characters
< 0x100) {
195 *usedByteLen
= __CFToASCIILatin1Fallback(*characters
, bytes
, maxByteLen
);
197 } else if (*characters
>= kSurrogateHighStart
&& *characters
<= kSurrogateLowEnd
) {
198 processCharLen
= (numChars
> 1 && *characters
<= kSurrogateLowStart
&& *(characters
+ 1) >= kSurrogateLowStart
&& *(characters
+ 1) <= kSurrogateLowEnd
? 2 : 1);
199 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceCharacterSet
)) {
201 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceAndNewlineCharacterSet
)) {
203 } else if (*characters
== 0x2026) { // ellipsis
204 if (0 == maxByteLen
) {
206 } else if (maxByteLen
> 2) {
207 memset(bytes
, '.', 3);
209 return processCharLen
;
211 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharDecomposableCharacterSet
)) {
212 UTF32Char decomposed
[MAX_DECOMPOSED_LENGTH
];
214 (void)CFUniCharDecomposeCharacter(*characters
, decomposed
, MAX_DECOMPOSED_LENGTH
);
215 if (*decomposed
< 0x80) {
216 byte
= (uint8_t)(*decomposed
);
218 UTF16Char theChar
= *decomposed
;
220 return __CFDefaultToBytesFallbackProc(&theChar
, 1, bytes
, maxByteLen
, usedByteLen
);
224 if (maxByteLen
) *bytes
= byte
;
225 *usedByteLen
= filledBytesLen
;
226 return processCharLen
;
229 static CFIndex
__CFDefaultToUnicodeFallbackProc(const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
230 if (maxCharLen
) *characters
= (UniChar
)'?';
235 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
236 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
238 #define EXTRA_BASE (0x0F00)
240 /* Wrapper funcs for non-standard converters
242 static CFIndex
__CFToBytesCheapEightBitWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
243 CFIndex processedCharLen
= 0;
244 CFIndex length
= (maxByteLen
&& (maxByteLen
< numChars
) ? maxByteLen
: numChars
);
247 while (processedCharLen
< length
) {
248 if (!((CFStringEncodingCheapEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
[processedCharLen
], &byte
)) break;
250 if (maxByteLen
) bytes
[processedCharLen
] = byte
;
254 *usedByteLen
= processedCharLen
;
255 return processedCharLen
;
258 static CFIndex
__CFToUnicodeCheapEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
259 CFIndex processedByteLen
= 0;
260 CFIndex length
= (maxCharLen
&& (maxCharLen
< numBytes
) ? maxCharLen
: numBytes
);
263 while (processedByteLen
< length
) {
264 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
266 if (maxCharLen
) characters
[processedByteLen
] = character
;
270 *usedCharLen
= processedByteLen
;
271 return processedByteLen
;
274 static CFIndex
__CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
275 CFIndex processedByteLen
= 0;
276 CFIndex theUsedCharLen
= 0;
277 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
280 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
282 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
283 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
285 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
288 usedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
289 *usedCharLen
= theUsedCharLen
;
291 for (idx
= 0;idx
< usedLen
;idx
++) {
292 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
293 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
296 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
297 *(characters
++) = (UniChar
)(charBuffer
[idx
] >> 10) + 0xD800UL
;
298 *(characters
++) = (UniChar
)(charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
301 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
303 *(characters
++) = charBuffer
[idx
];
307 if (maxCharLen
) *(characters
++) = character
;
313 *usedCharLen
= theUsedCharLen
;
314 return processedByteLen
;
317 static CFIndex
__CFToBytesStandardEightBitWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
318 CFIndex processedCharLen
= 0;
324 while (numChars
&& (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
325 if (!(usedLen
= ((CFStringEncodingStandardEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
, numChars
, &byte
))) break;
327 if (maxByteLen
) bytes
[*usedByteLen
] = byte
;
329 characters
+= usedLen
;
331 processedCharLen
+= usedLen
;
334 return processedCharLen
;
337 static CFIndex
__CFToUnicodeStandardEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
338 CFIndex processedByteLen
= 0;
339 UniChar charBuffer
[__CFMaximumConvertedLength
];
344 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
345 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
350 if (*usedCharLen
+ usedLen
> maxCharLen
) break;
352 for (idx
= 0;idx
< usedLen
;idx
++) {
353 characters
[*usedCharLen
+ idx
] = charBuffer
[idx
];
356 *usedCharLen
+= usedLen
;
360 return processedByteLen
;
363 static CFIndex
__CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
364 CFIndex processedByteLen
= 0;
365 UniChar charBuffer
[__CFMaximumConvertedLength
];
366 UTF32Char decompBuffer
[MAX_DECOMPOSED_LENGTH
];
369 CFIndex idx
, decompIndex
;
370 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
371 CFIndex theUsedCharLen
= 0;
373 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
374 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
376 for (idx
= 0;idx
< usedLen
;idx
++) {
377 if (CFUniCharIsDecomposableCharacter(charBuffer
[idx
], isHFSPlus
)) {
378 decompedLen
= CFUniCharDecomposeCharacter(charBuffer
[idx
], decompBuffer
, MAX_DECOMPOSED_LENGTH
);
379 *usedCharLen
= theUsedCharLen
;
381 for (decompIndex
= 0;decompIndex
< decompedLen
;decompIndex
++) {
382 if (decompBuffer
[decompIndex
] > 0xFFFF) { // Non-BMP
383 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
386 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
387 *(characters
++) = (charBuffer
[idx
] >> 10) + 0xD800UL
;
388 *(characters
++) = (charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
391 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
393 *(characters
++) = charBuffer
[idx
];
397 if (maxCharLen
) *(characters
++) = charBuffer
[idx
];
404 *usedCharLen
= theUsedCharLen
;
405 return processedByteLen
;
408 static CFIndex
__CFToBytesCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
409 CFIndex processedCharLen
= 0;
410 uint8_t byteBuffer
[__CFMaximumConvertedLength
];
415 while ((processedCharLen
< numChars
) && (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
416 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
[processedCharLen
], byteBuffer
))) break;
421 if (*usedByteLen
+ usedLen
> maxByteLen
) break;
423 for (idx
= 0;idx
<usedLen
;idx
++) {
424 bytes
[*usedByteLen
+ idx
] = byteBuffer
[idx
];
428 *usedByteLen
+= usedLen
;
432 return processedCharLen
;
435 static CFIndex
__CFToUnicodeCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
436 CFIndex processedByteLen
= 0;
442 while (numBytes
&& (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
443 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
445 if (maxCharLen
) *(characters
++) = character
;
447 processedByteLen
+= usedLen
;
452 return processedByteLen
;
455 static CFIndex
__CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
456 CFIndex processedByteLen
= 0;
457 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
460 CFIndex decomposedLen
;
461 CFIndex theUsedCharLen
= 0;
462 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
464 while (numBytes
&& (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
465 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
467 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
470 decomposedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
471 *usedCharLen
= theUsedCharLen
;
473 for (idx
= 0;idx
< decomposedLen
;idx
++) {
474 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
475 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
478 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
479 *(characters
++) = (UniChar
)(charBuffer
[idx
] >> 10) + 0xD800UL
;
480 *(characters
++) = (UniChar
)(charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
483 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
485 *(characters
++) = charBuffer
[idx
];
489 if (maxCharLen
) *(characters
++) = character
;
493 processedByteLen
+= usedLen
;
497 *usedCharLen
= theUsedCharLen
;
498 return processedByteLen
;
503 CF_INLINE _CFEncodingConverter
*__CFEncodingConverterFromDefinition(const CFStringEncodingConverter
*definition
, CFStringEncoding encoding
) {
504 #define NUM_OF_ENTRIES_CYCLE (10)
505 static uint32_t _currentIndex
= 0;
506 static uint32_t _allocatedSize
= 0;
507 static _CFEncodingConverter
*_allocatedEntries
= NULL
;
508 _CFEncodingConverter
*converter
;
511 if ((_currentIndex
+ 1) >= _allocatedSize
) {
514 _allocatedEntries
= NULL
;
516 if (_allocatedEntries
== NULL
) { // Not allocated yet
517 _allocatedEntries
= (_CFEncodingConverter
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(_CFEncodingConverter
) * NUM_OF_ENTRIES_CYCLE
, 0);
518 _allocatedSize
= NUM_OF_ENTRIES_CYCLE
;
519 converter
= &(_allocatedEntries
[_currentIndex
]);
521 converter
= &(_allocatedEntries
[++_currentIndex
]);
524 memset(converter
, 0, sizeof(_CFEncodingConverter
));
526 converter
->definition
= definition
;
528 switch (definition
->encodingClass
) {
529 case kCFStringEncodingConverterStandard
:
530 converter
->toBytes
= NULL
;
531 converter
->toUnicode
= NULL
;
532 converter
->toCanonicalUnicode
= NULL
;
535 case kCFStringEncodingConverterCheapEightBit
:
536 converter
->toBytes
= __CFToBytesCheapEightBitWrapper
;
537 converter
->toUnicode
= __CFToUnicodeCheapEightBitWrapper
;
538 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapEightBitWrapper
;
541 case kCFStringEncodingConverterStandardEightBit
:
542 converter
->toBytes
= __CFToBytesStandardEightBitWrapper
;
543 converter
->toUnicode
= __CFToUnicodeStandardEightBitWrapper
;
544 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeStandardEightBitWrapper
;
547 case kCFStringEncodingConverterCheapMultiByte
:
548 converter
->toBytes
= __CFToBytesCheapMultiByteWrapper
;
549 converter
->toUnicode
= __CFToUnicodeCheapMultiByteWrapper
;
550 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapMultiByteWrapper
;
553 case kCFStringEncodingConverterICU
:
554 converter
->toBytes
= (_CFToBytesProc
)__CFStringEncodingGetICUName(encoding
);
557 case kCFStringEncodingConverterPlatformSpecific
:
560 default: // Shouln't be here
564 converter
->toBytesFallback
= (definition
->toBytesFallback
? definition
->toBytesFallback
: __CFDefaultToBytesFallbackProc
);
565 converter
->toUnicodeFallback
= (definition
->toUnicodeFallback
? definition
->toUnicodeFallback
: __CFDefaultToUnicodeFallbackProc
);
570 CF_INLINE
const CFStringEncodingConverter
*__CFStringEncodingConverterGetDefinition(CFStringEncoding encoding
) {
572 case kCFStringEncodingUTF8
:
573 return &__CFConverterUTF8
;
575 case kCFStringEncodingMacRoman
:
576 return &__CFConverterMacRoman
;
578 case kCFStringEncodingWindowsLatin1
:
579 return &__CFConverterWinLatin1
;
581 case kCFStringEncodingASCII
:
582 return &__CFConverterASCII
;
584 case kCFStringEncodingISOLatin1
:
585 return &__CFConverterISOLatin1
;
588 case kCFStringEncodingNextStepLatin
:
589 return &__CFConverterNextStepLatin
;
593 return __CFStringEncodingGetExternalConverter(encoding
);
597 static const _CFEncodingConverter
*__CFGetConverter(uint32_t encoding
) {
598 const _CFEncodingConverter
*converter
= NULL
;
599 const _CFEncodingConverter
**commonConverterSlot
= NULL
;
600 static _CFEncodingConverter
*commonConverters
[3] = {NULL
, NULL
, NULL
}; // UTF8, MacRoman/WinLatin1, and the default encoding*
601 static CFMutableDictionaryRef mappingTable
= NULL
;
602 static CFSpinLock_t lock
= CFSpinLockInit
;
605 case kCFStringEncodingUTF8
: commonConverterSlot
= (const _CFEncodingConverter
**)&(commonConverters
[0]); break;
607 /* the swith here should avoid possible bootstrap issues in the default: case below when invoked from CFStringGetSystemEncoding() */
608 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX
609 case kCFStringEncodingMacRoman
: commonConverterSlot
= (const _CFEncodingConverter
**)&(commonConverters
[1]); break;
610 #elif DEPLOYMENT_TARGET_WINDOWS
611 case kCFStringEncodingWindowsLatin1
: commonConverterSlot
= (const _CFEncodingConverter
**)(&(commonConverters
[1])); break;
613 #warning This case must match __defaultEncoding value defined in CFString.c
614 case kCFStringEncodingISOLatin1
: commonConverterSlot
= (const _CFEncodingConverter
**)(&(commonConverters
[1])); break;
615 #endif /* DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED */
617 default: if (CFStringGetSystemEncoding() == encoding
) commonConverterSlot
= (const _CFEncodingConverter
**)&(commonConverters
[2]); break;
621 converter
= ((NULL
== commonConverterSlot
) ? ((NULL
== mappingTable
) ? NULL
: (const _CFEncodingConverter
*)CFDictionaryGetValue(mappingTable
, (const void *)(uintptr_t)encoding
)) : *commonConverterSlot
);
622 __CFSpinUnlock(&lock
);
624 if (NULL
== converter
) {
625 const CFStringEncodingConverter
*definition
= __CFStringEncodingConverterGetDefinition(encoding
);
627 if (NULL
!= definition
) {
629 converter
= ((NULL
== commonConverterSlot
) ? ((NULL
== mappingTable
) ? NULL
: (const _CFEncodingConverter
*)CFDictionaryGetValue(mappingTable
, (const void *)(uintptr_t)encoding
)) : *commonConverterSlot
);
631 if (NULL
== converter
) {
632 converter
= __CFEncodingConverterFromDefinition(definition
, encoding
);
634 if (NULL
== commonConverterSlot
) {
635 if (NULL
== mappingTable
) mappingTable
= CFDictionaryCreateMutable(NULL
, 0, NULL
, NULL
);
637 CFDictionarySetValue(mappingTable
, (const void *)(uintptr_t)encoding
, converter
);
639 *commonConverterSlot
= converter
;
642 __CFSpinUnlock(&lock
);
651 uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, CFIndex
*usedCharLen
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
652 if (encoding
== kCFStringEncodingUTF8
) {
653 static CFStringEncodingToBytesProc __CFToUTF8
= NULL
;
654 CFIndex convertedCharLen
;
658 if ((flags
& kCFStringEncodingUseCanonical
) || (flags
& kCFStringEncodingUseHFSPlusCanonical
)) {
659 (void)CFUniCharDecompose(characters
, numChars
, &convertedCharLen
, (void *)bytes
, maxByteLen
, &usedLen
, true, kCFUniCharUTF8Format
, (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false));
662 const CFStringEncodingConverter
*utf8Converter
= CFStringEncodingGetConverter(kCFStringEncodingUTF8
);
663 __CFToUTF8
= (CFStringEncodingToBytesProc
)utf8Converter
->toBytes
;
665 convertedCharLen
= __CFToUTF8(0, characters
, numChars
, bytes
, maxByteLen
, &usedLen
);
667 if (usedCharLen
) *usedCharLen
= convertedCharLen
;
668 if (usedByteLen
) *usedByteLen
= usedLen
;
670 if (convertedCharLen
== numChars
) {
671 return kCFStringEncodingConversionSuccess
;
672 } else if ((maxByteLen
> 0) && ((maxByteLen
- usedLen
) < 10)) { // could be filled outbuf
673 UTF16Char character
= characters
[convertedCharLen
];
675 if (((character
>= kSurrogateLowStart
) && (character
<= kSurrogateLowEnd
)) || ((character
>= kSurrogateHighStart
) && (character
<= kSurrogateHighEnd
) && ((1 == (numChars
- convertedCharLen
)) || (characters
[convertedCharLen
+ 1] < kSurrogateLowStart
) || (characters
[convertedCharLen
+ 1] > kSurrogateLowEnd
)))) return kCFStringEncodingInvalidInputStream
;
677 return kCFStringEncodingInsufficientOutputBufferLength
;
679 return kCFStringEncodingInvalidInputStream
;
682 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
684 CFIndex localUsedByteLen
;
685 CFIndex theUsedByteLen
= 0;
686 uint32_t theResult
= kCFStringEncodingConversionSuccess
;
687 CFStringEncodingToBytesPrecomposeProc toBytesPrecompose
= NULL
;
688 CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar
= NULL
;
690 if (!converter
) return kCFStringEncodingConverterUnavailable
;
692 if (flags
& kCFStringEncodingSubstituteCombinings
) {
693 if (!(flags
& kCFStringEncodingAllowLossyConversion
)) isValidCombiningChar
= converter
->definition
->isValidCombiningChar
;
695 isValidCombiningChar
= converter
->definition
->isValidCombiningChar
;
696 if (!(flags
& kCFStringEncodingIgnoreCombinings
)) {
697 toBytesPrecompose
= converter
->definition
->toBytesPrecompose
;
698 flags
|= kCFStringEncodingComposeCombinings
;
702 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUToBytes((const char *)converter
->toBytes
, flags
, characters
, numChars
, usedCharLen
, bytes
, maxByteLen
, usedByteLen
);
704 /* Platform converter */
705 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformUnicodeToBytes(encoding
, flags
, characters
, numChars
, usedCharLen
, bytes
, maxByteLen
, usedByteLen
);
707 while ((usedLen
< numChars
) && (!maxByteLen
|| (theUsedByteLen
< maxByteLen
))) {
708 if ((usedLen
+= TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) < numChars
) {
711 if (isValidCombiningChar
&& (usedLen
> 0) && isValidCombiningChar(characters
[usedLen
])) {
712 if (toBytesPrecompose
) {
713 CFIndex localUsedLen
= usedLen
;
715 while (isValidCombiningChar(characters
[--usedLen
]));
716 theUsedByteLen
+= localUsedByteLen
;
717 if (converter
->definition
->maxBytesPerChar
> 1) {
718 TO_BYTE(converter
, flags
, characters
+ usedLen
, localUsedLen
- usedLen
, NULL
, 0, &localUsedByteLen
);
719 theUsedByteLen
-= localUsedByteLen
;
723 if ((localUsedLen
= toBytesPrecompose(flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) > 0) {
724 usedLen
+= localUsedLen
;
725 if ((usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
])) { // There is a non-base char not combined remaining
726 theUsedByteLen
+= localUsedByteLen
;
727 theResult
= kCFStringEncodingInvalidInputStream
;
730 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
731 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
734 while (isValidCombiningChar(characters
[++usedLen
]));
735 localUsedByteLen
= 1;
736 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
739 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
742 theResult
= kCFStringEncodingInvalidInputStream
;
745 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
746 theUsedByteLen
+= localUsedByteLen
;
747 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
749 } else if (flags
& kCFStringEncodingIgnoreCombinings
) {
750 while ((++usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
]));
752 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
754 theUsedByteLen
+= localUsedByteLen
;
757 localUsedByteLen
= 1;
758 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
760 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
763 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
764 theUsedByteLen
+= localUsedByteLen
;
766 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
767 CFIndex localUsedLen
;
769 localUsedByteLen
= 0;
770 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
772 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
774 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
775 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
777 theUsedByteLen
+= localUsedByteLen
;
780 localUsedByteLen
= 1;
781 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
783 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
786 theUsedByteLen
+= localUsedByteLen
;
787 theResult
= kCFStringEncodingInvalidInputStream
;
791 theUsedByteLen
+= localUsedByteLen
;
794 if (usedLen
< numChars
&& maxByteLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
795 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
796 CFIndex localUsedLen
;
798 localUsedByteLen
= 0;
799 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
801 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
803 if (usedByteLen
) *usedByteLen
= theUsedByteLen
;
804 if (usedCharLen
) *usedCharLen
= usedLen
;
810 uint32_t CFStringEncodingBytesToUnicode(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, CFIndex
*usedByteLen
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
811 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
813 CFIndex theUsedCharLen
= 0;
814 CFIndex localUsedCharLen
;
815 uint32_t theResult
= kCFStringEncodingConversionSuccess
;
817 if (!converter
) return kCFStringEncodingConverterUnavailable
;
819 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUToUnicode((const char *)converter
->toBytes
, flags
, bytes
, numBytes
, usedByteLen
, characters
, maxCharLen
, usedCharLen
);
821 /* Platform converter */
822 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformBytesToUnicode(encoding
, flags
, bytes
, numBytes
, usedByteLen
, characters
, maxCharLen
, usedCharLen
);
824 while ((usedLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
825 if ((usedLen
+= TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
)) < numBytes
) {
826 CFIndex tempUsedCharLen
;
828 if (maxCharLen
&& ((maxCharLen
== theUsedCharLen
+ localUsedCharLen
) || (((flags
& (kCFStringEncodingUseCanonical
|kCFStringEncodingUseHFSPlusCanonical
)) || (maxCharLen
== theUsedCharLen
+ localUsedCharLen
+ 1)) && TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, NULL
, 0, &tempUsedCharLen
)))) { // buffer was filled up
829 theUsedCharLen
+= localUsedCharLen
;
830 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
832 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
833 theUsedCharLen
+= localUsedCharLen
;
834 usedLen
+= TO_UNICODE_FALLBACK(converter
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
);
836 theUsedCharLen
+= localUsedCharLen
;
837 theResult
= kCFStringEncodingInvalidInputStream
;
841 theUsedCharLen
+= localUsedCharLen
;
844 if (usedLen
< numBytes
&& maxCharLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
845 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
847 if (usedCharLen
) *usedCharLen
= theUsedCharLen
;
848 if (usedByteLen
) *usedByteLen
= usedLen
;
853 __private_extern__
bool CFStringEncodingIsValidEncoding(uint32_t encoding
) {
854 return (CFStringEncodingGetConverter(encoding
) ? true : false);
857 __private_extern__ CFIndex
CFStringEncodingCharLengthForBytes(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
) {
858 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
861 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUCharLength((const char *)converter
->toBytes
, flags
, bytes
, numBytes
);
863 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformCharLengthForBytes(encoding
, flags
, bytes
, numBytes
);
865 if (1 == converter
->definition
->maxBytesPerChar
) return numBytes
;
867 if (NULL
== converter
->definition
->toUnicodeLen
) {
868 CFIndex usedByteLen
= 0;
869 CFIndex totalLength
= 0;
872 while (numBytes
> 0) {
873 usedByteLen
= TO_UNICODE(converter
, flags
, bytes
, numBytes
, NULL
, 0, &usedCharLen
);
875 bytes
+= usedByteLen
;
876 numBytes
-= usedByteLen
;
877 totalLength
+= usedCharLen
;
880 if (0 == (flags
& kCFStringEncodingAllowLossyConversion
)) return 0;
882 usedByteLen
= TO_UNICODE_FALLBACK(converter
, bytes
, numBytes
, NULL
, 0, &usedCharLen
);
884 bytes
+= usedByteLen
;
885 numBytes
-= usedByteLen
;
886 totalLength
+= usedCharLen
;
892 return converter
->definition
->toUnicodeLen(flags
, bytes
, numBytes
);
899 __private_extern__ CFIndex
CFStringEncodingByteLengthForCharacters(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
) {
900 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
903 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUByteLength((const char *)converter
->toBytes
, flags
, characters
, numChars
);
905 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformByteLengthForCharacters(encoding
, flags
, characters
, numChars
);
907 if (1 == converter
->definition
->maxBytesPerChar
) return numChars
;
909 if (NULL
== converter
->definition
->toBytesLen
) {
912 return ((kCFStringEncodingConversionSuccess
== CFStringEncodingUnicodeToBytes(encoding
, flags
, characters
, numChars
, NULL
, NULL
, 0, &usedByteLen
)) ? usedByteLen
: 0);
914 return converter
->definition
->toBytesLen(flags
, characters
, numChars
);
921 __private_extern__
void CFStringEncodingRegisterFallbackProcedures(uint32_t encoding
, CFStringEncodingToBytesFallbackProc toBytes
, CFStringEncodingToUnicodeFallbackProc toUnicode
) {
922 _CFEncodingConverter
*converter
= (_CFEncodingConverter
*)__CFGetConverter(encoding
);
924 if (NULL
!= converter
) {
925 const CFStringEncodingConverter
*body
= CFStringEncodingGetConverter(encoding
);
927 converter
->toBytesFallback
= ((NULL
== toBytes
) ? ((NULL
== body
) ? __CFDefaultToBytesFallbackProc
: body
->toBytesFallback
) : toBytes
);
928 converter
->toUnicodeFallback
= ((NULL
== toUnicode
) ? ((NULL
== body
) ? __CFDefaultToUnicodeFallbackProc
: body
->toUnicodeFallback
) : toUnicode
);
932 __private_extern__
const CFStringEncodingConverter
*CFStringEncodingGetConverter(uint32_t encoding
) {
933 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
935 return ((NULL
== converter
) ? NULL
: converter
->definition
);
938 static const CFStringEncoding __CFBuiltinEncodings
[] = {
939 kCFStringEncodingMacRoman
,
940 kCFStringEncodingWindowsLatin1
,
941 kCFStringEncodingISOLatin1
,
942 kCFStringEncodingNextStepLatin
,
943 kCFStringEncodingASCII
,
944 kCFStringEncodingUTF8
,
945 /* These seven are available only in CFString-level */
946 kCFStringEncodingNonLossyASCII
,
948 kCFStringEncodingUTF16
,
949 kCFStringEncodingUTF16BE
,
950 kCFStringEncodingUTF16LE
,
952 kCFStringEncodingUTF32
,
953 kCFStringEncodingUTF32BE
,
954 kCFStringEncodingUTF32LE
,
956 kCFStringEncodingInvalidId
,
959 static CFComparisonResult
__CFStringEncodingComparator(const void *v1
, const void *v2
, void *context
) {
960 CFComparisonResult val1
= (*(const CFStringEncoding
*)v1
) & 0xFFFF;
961 CFComparisonResult val2
= (*(const CFStringEncoding
*)v2
) & 0xFFFF;
963 return ((val1
== val2
) ? ((CFComparisonResult
)(*(const CFStringEncoding
*)v1
) - (CFComparisonResult
)(*(const CFStringEncoding
*)v2
)) : val1
- val2
);
966 static void __CFStringEncodingFliterDupes(CFStringEncoding
*encodings
, CFIndex numSlots
) {
967 CFStringEncoding last
= kCFStringEncodingInvalidId
;
968 const CFStringEncoding
*limitEncodings
= encodings
+ numSlots
;
970 while (encodings
< limitEncodings
) {
971 if (last
== *encodings
) {
972 if ((encodings
+ 1) < limitEncodings
) memmove(encodings
, encodings
+ 1, sizeof(CFStringEncoding
) * (limitEncodings
- encodings
- 1));
975 last
= *(encodings
++);
980 __private_extern__
const CFStringEncoding
*CFStringEncodingListOfAvailableEncodings(void) {
981 static const CFStringEncoding
*encodings
= NULL
;
983 if (NULL
== encodings
) {
984 CFStringEncoding
*list
= (CFStringEncoding
*)__CFBuiltinEncodings
;
985 CFIndex numICUConverters
= 0, numPlatformConverters
= 0;
986 CFStringEncoding
*icuConverters
= __CFStringEncodingCreateICUEncodings(NULL
, &numICUConverters
);
987 CFStringEncoding
*platformConverters
= __CFStringEncodingCreateListOfAvailablePlatformConverters(NULL
, &numPlatformConverters
);
989 if ((NULL
!= icuConverters
) || (NULL
!= platformConverters
)) {
990 CFIndex numSlots
= (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)) + numICUConverters
+ numPlatformConverters
;
992 list
= (CFStringEncoding
*)CFAllocatorAllocate(NULL
, sizeof(CFStringEncoding
) * numSlots
, 0);
994 memcpy(list
, __CFBuiltinEncodings
, sizeof(__CFBuiltinEncodings
));
996 if (NULL
!= icuConverters
) {
997 memcpy(list
+ (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)), icuConverters
, sizeof(CFStringEncoding
) * numICUConverters
);
998 CFAllocatorDeallocate(NULL
, icuConverters
);
1001 if (NULL
!= platformConverters
) {
1002 memcpy(list
+ (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)) + numICUConverters
, platformConverters
, sizeof(CFStringEncoding
) * numPlatformConverters
);
1003 CFAllocatorDeallocate(NULL
, platformConverters
);
1006 CFQSortArray(list
, numSlots
, sizeof(CFStringEncoding
), (CFComparatorFunction
)__CFStringEncodingComparator
, NULL
);
1007 __CFStringEncodingFliterDupes(list
, numSlots
);
1009 if (!OSAtomicCompareAndSwapPtrBarrier(NULL
, list
, (void * volatile *)&encodings
) && (list
!= __CFBuiltinEncodings
)) CFAllocatorDeallocate(NULL
, list
);
1018 #undef kSurrogateHighStart
1019 #undef kSurrogateHighEnd
1020 #undef kSurrogateLowStart
1021 #undef kSurrogateLowEnd
1022 #undef TO_BYTE_FALLBACK
1023 #undef TO_UNICODE_FALLBACK
1025 #undef NUM_OF_ENTRIES_CYCLE