2 * Copyright (c) 2014 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /* CFStringEncodingConverter.c
25 Copyright (c) 1998-2013, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFArray.h>
31 #include <CoreFoundation/CFDictionary.h>
32 #include "CFICUConverters.h"
33 #include <CoreFoundation/CFUniChar.h>
34 #include <CoreFoundation/CFPriv.h>
35 #include "CFUnicodeDecomposition.h"
36 #include "CFStringEncodingConverterExt.h"
37 #include "CFStringEncodingConverterPriv.h"
40 typedef CFIndex (*_CFToBytesProc
)(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
);
41 typedef CFIndex (*_CFToUnicodeProc
)(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
);
44 const CFStringEncodingConverter
*definition
;
45 _CFToBytesProc toBytes
;
46 _CFToUnicodeProc toUnicode
;
47 _CFToUnicodeProc toCanonicalUnicode
;
48 CFStringEncodingToBytesFallbackProc toBytesFallback
;
49 CFStringEncodingToUnicodeFallbackProc toUnicodeFallback
;
50 } _CFEncodingConverter
;
54 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->definition->toBytes)(flags,chars,numChars,bytes,max,used))
55 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->toUnicode ? (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->definition->toUnicode)(flags,bytes,numBytes,chars,max,used))
57 #define ASCIINewLine 0x0a
58 #define kSurrogateHighStart 0xD800
59 #define kSurrogateHighEnd 0xDBFF
60 #define kSurrogateLowStart 0xDC00
61 #define kSurrogateLowEnd 0xDFFF
63 static const uint8_t __CFMaximumConvertedLength
= 20;
65 /* Mapping 128..255 to lossy ASCII
68 unsigned char chars
[4];
69 } _toLossyASCIITable
[] = {
70 {{' ', 0, 0, 0}}, // NO-BREAK SPACE
71 {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
72 {{'c', 0, 0, 0}}, // CENT SIGN
73 {{'L', 0, 0, 0}}, // POUND SIGN
74 {{'$', 0, 0, 0}}, // CURRENCY SIGN
75 {{'Y', 0, 0, 0}}, // YEN SIGN
76 {{'|', 0, 0, 0}}, // BROKEN BAR
77 {{0, 0, 0, 0}}, // SECTION SIGN
78 {{0, 0, 0, 0}}, // DIAERESIS
79 {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
80 {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
81 {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
82 {{0, 0, 0, 0}}, // NOT SIGN
83 {{'-', 0, 0, 0}}, // SOFT HYPHEN
84 {{'(', 'R', ')', 0}}, // REGISTERED SIGN
85 {{0, 0, 0, 0}}, // MACRON
86 {{0, 0, 0, 0}}, // DEGREE SIGN
87 {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
88 {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
89 {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
90 {{0, 0, 0, 0}}, // ACUTE ACCENT
91 {{0, 0, 0, 0}}, // MICRO SIGN
92 {{0, 0, 0, 0}}, // PILCROW SIGN
93 {{0, 0, 0, 0}}, // MIDDLE DOT
94 {{0, 0, 0, 0}}, // CEDILLA
95 {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
96 {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
97 {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
98 {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
99 {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
100 {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
101 {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
102 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
103 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
104 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
105 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
106 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
107 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
108 {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
109 {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
110 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
111 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
112 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
113 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
114 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
115 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
116 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
117 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
118 {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
119 {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
120 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
121 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
122 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
123 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
124 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
125 {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
126 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
127 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
128 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
129 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
130 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
131 {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
132 {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
133 {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
134 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
135 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
136 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
137 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
138 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
139 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
140 {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
141 {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
142 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
143 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
144 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
145 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
146 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
147 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
148 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
149 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
150 {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
151 {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
152 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
153 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
154 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
155 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
156 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
157 {{'/', 0, 0, 0}}, // DIVISION SIGN
158 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
159 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
160 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
161 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
162 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
163 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
164 {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
165 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
168 CF_INLINE CFIndex
__CFToASCIILatin1Fallback(UniChar character
, uint8_t *bytes
, CFIndex maxByteLen
) {
169 const uint8_t *losChars
= (const uint8_t*)_toLossyASCIITable
+ (character
- 0xA0) * sizeof(uint8_t[4]);
170 CFIndex numBytes
= 0;
171 CFIndex idx
, max
= (maxByteLen
&& (maxByteLen
< 4) ? maxByteLen
: 4);
173 for (idx
= 0;idx
< max
;idx
++) {
175 if (maxByteLen
) bytes
[idx
] = losChars
[idx
];
185 static CFIndex
__CFDefaultToBytesFallbackProc(const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
186 CFIndex processCharLen
= 1, filledBytesLen
= 1;
189 if (*characters
< 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
190 byte
= (uint8_t)(*characters
- 0x80);
191 } else if (*characters
< 0x100) {
192 *usedByteLen
= __CFToASCIILatin1Fallback(*characters
, bytes
, maxByteLen
);
194 } else if (*characters
>= kSurrogateHighStart
&& *characters
<= kSurrogateLowEnd
) {
195 processCharLen
= (numChars
> 1 && *characters
<= kSurrogateLowStart
&& *(characters
+ 1) >= kSurrogateLowStart
&& *(characters
+ 1) <= kSurrogateLowEnd
? 2 : 1);
196 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceCharacterSet
)) {
198 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceAndNewlineCharacterSet
)) {
200 } else if (*characters
== 0x2026) { // ellipsis
201 if (0 == maxByteLen
) {
203 } else if (maxByteLen
> 2) {
204 memset(bytes
, '.', 3);
206 return processCharLen
;
208 } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharDecomposableCharacterSet
)) {
209 UTF32Char decomposed
[MAX_DECOMPOSED_LENGTH
];
211 (void)CFUniCharDecomposeCharacter(*characters
, decomposed
, MAX_DECOMPOSED_LENGTH
);
212 if (*decomposed
< 0x80) {
213 byte
= (uint8_t)(*decomposed
);
215 UTF16Char theChar
= *decomposed
;
217 return __CFDefaultToBytesFallbackProc(&theChar
, 1, bytes
, maxByteLen
, usedByteLen
);
221 if (maxByteLen
) *bytes
= byte
;
222 *usedByteLen
= filledBytesLen
;
223 return processCharLen
;
226 static CFIndex
__CFDefaultToUnicodeFallbackProc(const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
227 if (maxCharLen
) *characters
= (UniChar
)'?';
232 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
233 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
235 #define EXTRA_BASE (0x0F00)
237 /* Wrapper funcs for non-standard converters
239 static CFIndex
__CFToBytesCheapEightBitWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
240 CFIndex processedCharLen
= 0;
241 CFIndex length
= (maxByteLen
&& (maxByteLen
< numChars
) ? maxByteLen
: numChars
);
244 while (processedCharLen
< length
) {
245 if (!((CFStringEncodingCheapEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
[processedCharLen
], &byte
)) break;
247 if (maxByteLen
) bytes
[processedCharLen
] = byte
;
251 *usedByteLen
= processedCharLen
;
252 return processedCharLen
;
255 static CFIndex
__CFToUnicodeCheapEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
256 CFIndex processedByteLen
= 0;
257 CFIndex length
= (maxCharLen
&& (maxCharLen
< numBytes
) ? maxCharLen
: numBytes
);
260 while (processedByteLen
< length
) {
261 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
263 if (maxCharLen
) characters
[processedByteLen
] = character
;
267 *usedCharLen
= processedByteLen
;
268 return processedByteLen
;
271 static CFIndex
__CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
272 CFIndex processedByteLen
= 0;
273 CFIndex theUsedCharLen
= 0;
274 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
277 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
279 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
280 if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break;
282 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
285 usedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
286 *usedCharLen
= theUsedCharLen
;
288 for (idx
= 0;idx
< usedLen
;idx
++) {
289 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
290 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
293 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
294 *(characters
++) = (UniChar
)(charBuffer
[idx
] >> 10) + 0xD800UL
;
295 *(characters
++) = (UniChar
)(charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
298 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
300 *(characters
++) = charBuffer
[idx
];
304 if (maxCharLen
) *(characters
++) = character
;
310 *usedCharLen
= theUsedCharLen
;
311 return processedByteLen
;
314 static CFIndex
__CFToBytesStandardEightBitWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
315 CFIndex processedCharLen
= 0;
321 while (numChars
&& (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
322 if (!(usedLen
= ((CFStringEncodingStandardEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
, numChars
, &byte
))) break;
324 if (maxByteLen
) bytes
[*usedByteLen
] = byte
;
326 characters
+= usedLen
;
328 processedCharLen
+= usedLen
;
331 return processedCharLen
;
334 static CFIndex
__CFToUnicodeStandardEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
335 CFIndex processedByteLen
= 0;
336 UniChar charBuffer
[__CFMaximumConvertedLength
];
341 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
342 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
347 if (*usedCharLen
+ usedLen
> maxCharLen
) break;
349 for (idx
= 0;idx
< usedLen
;idx
++) {
350 characters
[*usedCharLen
+ idx
] = charBuffer
[idx
];
353 *usedCharLen
+= usedLen
;
357 return processedByteLen
;
360 static CFIndex
__CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
361 CFIndex processedByteLen
= 0;
362 UniChar charBuffer
[__CFMaximumConvertedLength
];
363 UTF32Char decompBuffer
[MAX_DECOMPOSED_LENGTH
];
366 CFIndex idx
, decompIndex
;
367 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
368 CFIndex theUsedCharLen
= 0;
370 while ((processedByteLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
371 if (!(usedLen
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break;
373 for (idx
= 0;idx
< usedLen
;idx
++) {
374 if (CFUniCharIsDecomposableCharacter(charBuffer
[idx
], isHFSPlus
)) {
375 decompedLen
= CFUniCharDecomposeCharacter(charBuffer
[idx
], decompBuffer
, MAX_DECOMPOSED_LENGTH
);
376 *usedCharLen
= theUsedCharLen
;
378 for (decompIndex
= 0;decompIndex
< decompedLen
;decompIndex
++) {
379 if (decompBuffer
[decompIndex
] > 0xFFFF) { // Non-BMP
380 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
383 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
384 *(characters
++) = (charBuffer
[idx
] >> 10) + 0xD800UL
;
385 *(characters
++) = (charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
388 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
390 *(characters
++) = charBuffer
[idx
];
394 if (maxCharLen
) *(characters
++) = charBuffer
[idx
];
401 *usedCharLen
= theUsedCharLen
;
402 return processedByteLen
;
405 static CFIndex
__CFToBytesCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
406 CFIndex processedCharLen
= 0;
407 uint8_t byteBuffer
[__CFMaximumConvertedLength
];
412 while ((processedCharLen
< numChars
) && (!maxByteLen
|| (*usedByteLen
< maxByteLen
))) {
413 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
[processedCharLen
], byteBuffer
))) break;
418 if (*usedByteLen
+ usedLen
> maxByteLen
) break;
420 for (idx
= 0;idx
<usedLen
;idx
++) {
421 bytes
[*usedByteLen
+ idx
] = byteBuffer
[idx
];
425 *usedByteLen
+= usedLen
;
429 return processedCharLen
;
432 static CFIndex
__CFToUnicodeCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
433 CFIndex processedByteLen
= 0;
439 while (numBytes
&& (!maxCharLen
|| (*usedCharLen
< maxCharLen
))) {
440 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
442 if (maxCharLen
) *(characters
++) = character
;
444 processedByteLen
+= usedLen
;
449 return processedByteLen
;
452 static CFIndex
__CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
453 CFIndex processedByteLen
= 0;
454 UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
];
457 CFIndex decomposedLen
;
458 CFIndex theUsedCharLen
= 0;
459 bool isHFSPlus
= (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false);
461 while (numBytes
&& (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
462 if (!(usedLen
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
, numBytes
, &character
))) break;
464 if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) {
467 decomposedLen
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
);
468 *usedCharLen
= theUsedCharLen
;
470 for (idx
= 0;idx
< decomposedLen
;idx
++) {
471 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP
472 if (theUsedCharLen
+ 2 > maxCharLen
) return processedByteLen
;
475 charBuffer
[idx
] = charBuffer
[idx
] - 0x10000;
476 *(characters
++) = (UniChar
)(charBuffer
[idx
] >> 10) + 0xD800UL
;
477 *(characters
++) = (UniChar
)(charBuffer
[idx
] & 0x3FF) + 0xDC00UL
;
480 if (theUsedCharLen
+ 1 > maxCharLen
) return processedByteLen
;
482 *(characters
++) = charBuffer
[idx
];
486 if (maxCharLen
) *(characters
++) = character
;
490 processedByteLen
+= usedLen
;
494 *usedCharLen
= theUsedCharLen
;
495 return processedByteLen
;
500 CF_INLINE _CFEncodingConverter
*__CFEncodingConverterFromDefinition(const CFStringEncodingConverter
*definition
, CFStringEncoding encoding
) {
501 #define NUM_OF_ENTRIES_CYCLE (10)
502 static uint32_t _currentIndex
= 0;
503 static uint32_t _allocatedSize
= 0;
504 static _CFEncodingConverter
*_allocatedEntries
= NULL
;
505 _CFEncodingConverter
*converter
;
508 if ((_currentIndex
+ 1) >= _allocatedSize
) {
511 _allocatedEntries
= NULL
;
513 if (_allocatedEntries
== NULL
) { // Not allocated yet
514 _allocatedEntries
= (_CFEncodingConverter
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(_CFEncodingConverter
) * NUM_OF_ENTRIES_CYCLE
, 0);
515 _allocatedSize
= NUM_OF_ENTRIES_CYCLE
;
516 converter
= &(_allocatedEntries
[_currentIndex
]);
518 converter
= &(_allocatedEntries
[++_currentIndex
]);
521 memset(converter
, 0, sizeof(_CFEncodingConverter
));
523 converter
->definition
= definition
;
525 switch (definition
->encodingClass
) {
526 case kCFStringEncodingConverterStandard
:
527 converter
->toBytes
= NULL
;
528 converter
->toUnicode
= NULL
;
529 converter
->toCanonicalUnicode
= NULL
;
532 case kCFStringEncodingConverterCheapEightBit
:
533 converter
->toBytes
= __CFToBytesCheapEightBitWrapper
;
534 converter
->toUnicode
= __CFToUnicodeCheapEightBitWrapper
;
535 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapEightBitWrapper
;
538 case kCFStringEncodingConverterStandardEightBit
:
539 converter
->toBytes
= __CFToBytesStandardEightBitWrapper
;
540 converter
->toUnicode
= __CFToUnicodeStandardEightBitWrapper
;
541 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeStandardEightBitWrapper
;
544 case kCFStringEncodingConverterCheapMultiByte
:
545 converter
->toBytes
= __CFToBytesCheapMultiByteWrapper
;
546 converter
->toUnicode
= __CFToUnicodeCheapMultiByteWrapper
;
547 converter
->toCanonicalUnicode
= __CFToCanonicalUnicodeCheapMultiByteWrapper
;
550 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
551 case kCFStringEncodingConverterICU
:
552 converter
->toBytes
= (_CFToBytesProc
)__CFStringEncodingGetICUName(encoding
);
556 case kCFStringEncodingConverterPlatformSpecific
:
559 default: // Shouln't be here
563 converter
->toBytesFallback
= (definition
->toBytesFallback
? definition
->toBytesFallback
: __CFDefaultToBytesFallbackProc
);
564 converter
->toUnicodeFallback
= (definition
->toUnicodeFallback
? definition
->toUnicodeFallback
: __CFDefaultToUnicodeFallbackProc
);
569 CF_INLINE
const CFStringEncodingConverter
*__CFStringEncodingConverterGetDefinition(CFStringEncoding encoding
) {
571 case kCFStringEncodingUTF8
:
572 return &__CFConverterUTF8
;
574 case kCFStringEncodingMacRoman
:
575 return &__CFConverterMacRoman
;
577 case kCFStringEncodingWindowsLatin1
:
578 return &__CFConverterWinLatin1
;
580 case kCFStringEncodingASCII
:
581 return &__CFConverterASCII
;
583 case kCFStringEncodingISOLatin1
:
584 return &__CFConverterISOLatin1
;
587 case kCFStringEncodingNextStepLatin
:
588 return &__CFConverterNextStepLatin
;
592 return __CFStringEncodingGetExternalConverter(encoding
);
596 static const _CFEncodingConverter
*__CFGetConverter(uint32_t encoding
) {
597 const _CFEncodingConverter
*converter
= NULL
;
598 const _CFEncodingConverter
**commonConverterSlot
= NULL
;
599 static _CFEncodingConverter
*commonConverters
[3] = {NULL
, NULL
, NULL
}; // UTF8, MacRoman/WinLatin1, and the default encoding*
600 static CFMutableDictionaryRef mappingTable
= NULL
;
601 static CFSpinLock_t lock
= CFSpinLockInit
;
604 case kCFStringEncodingUTF8
: commonConverterSlot
= (const _CFEncodingConverter
**)&(commonConverters
[0]); break;
606 /* the swith here should avoid possible bootstrap issues in the default: case below when invoked from CFStringGetSystemEncoding() */
607 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
608 case kCFStringEncodingMacRoman
: commonConverterSlot
= (const _CFEncodingConverter
**)&(commonConverters
[1]); break;
609 #elif DEPLOYMENT_TARGET_WINDOWS
610 case kCFStringEncodingWindowsLatin1
: commonConverterSlot
= (const _CFEncodingConverter
**)(&(commonConverters
[1])); break;
612 #warning This case must match __defaultEncoding value defined in CFString.c
613 case kCFStringEncodingISOLatin1
: commonConverterSlot
= (const _CFEncodingConverter
**)(&(commonConverters
[1])); break;
616 default: if (CFStringGetSystemEncoding() == encoding
) commonConverterSlot
= (const _CFEncodingConverter
**)&(commonConverters
[2]); break;
620 converter
= ((NULL
== commonConverterSlot
) ? ((NULL
== mappingTable
) ? NULL
: (const _CFEncodingConverter
*)CFDictionaryGetValue(mappingTable
, (const void *)(uintptr_t)encoding
)) : *commonConverterSlot
);
621 __CFSpinUnlock(&lock
);
623 if (NULL
== converter
) {
624 const CFStringEncodingConverter
*definition
= __CFStringEncodingConverterGetDefinition(encoding
);
626 if (NULL
!= definition
) {
628 converter
= ((NULL
== commonConverterSlot
) ? ((NULL
== mappingTable
) ? NULL
: (const _CFEncodingConverter
*)CFDictionaryGetValue(mappingTable
, (const void *)(uintptr_t)encoding
)) : *commonConverterSlot
);
630 if (NULL
== converter
) {
631 converter
= __CFEncodingConverterFromDefinition(definition
, encoding
);
633 if (NULL
== commonConverterSlot
) {
634 if (NULL
== mappingTable
) mappingTable
= CFDictionaryCreateMutable(NULL
, 0, NULL
, NULL
);
636 CFDictionarySetValue(mappingTable
, (const void *)(uintptr_t)encoding
, converter
);
638 *commonConverterSlot
= converter
;
641 __CFSpinUnlock(&lock
);
650 uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
, CFIndex
*usedCharLen
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex
*usedByteLen
) {
651 if (encoding
== kCFStringEncodingUTF8
) {
652 static CFStringEncodingToBytesProc __CFToUTF8
= NULL
;
653 CFIndex convertedCharLen
;
657 if ((flags
& kCFStringEncodingUseCanonical
) || (flags
& kCFStringEncodingUseHFSPlusCanonical
)) {
658 (void)CFUniCharDecompose(characters
, numChars
, &convertedCharLen
, (void *)bytes
, maxByteLen
, &usedLen
, true, kCFUniCharUTF8Format
, (flags
& kCFStringEncodingUseHFSPlusCanonical
? true : false));
661 const CFStringEncodingConverter
*utf8Converter
= CFStringEncodingGetConverter(kCFStringEncodingUTF8
);
662 __CFToUTF8
= (CFStringEncodingToBytesProc
)utf8Converter
->toBytes
;
664 convertedCharLen
= __CFToUTF8(0, characters
, numChars
, bytes
, maxByteLen
, &usedLen
);
666 if (usedCharLen
) *usedCharLen
= convertedCharLen
;
667 if (usedByteLen
) *usedByteLen
= usedLen
;
669 if (convertedCharLen
== numChars
) {
670 return kCFStringEncodingConversionSuccess
;
671 } else if ((maxByteLen
> 0) && ((maxByteLen
- usedLen
) < 10)) { // could be filled outbuf
672 UTF16Char character
= characters
[convertedCharLen
];
674 if (((character
>= kSurrogateLowStart
) && (character
<= kSurrogateLowEnd
)) || ((character
>= kSurrogateHighStart
) && (character
<= kSurrogateHighEnd
) && ((1 == (numChars
- convertedCharLen
)) || (characters
[convertedCharLen
+ 1] < kSurrogateLowStart
) || (characters
[convertedCharLen
+ 1] > kSurrogateLowEnd
)))) return kCFStringEncodingInvalidInputStream
;
676 return kCFStringEncodingInsufficientOutputBufferLength
;
678 return kCFStringEncodingInvalidInputStream
;
681 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
683 CFIndex localUsedByteLen
;
684 CFIndex theUsedByteLen
= 0;
685 uint32_t theResult
= kCFStringEncodingConversionSuccess
;
686 CFStringEncodingToBytesPrecomposeProc toBytesPrecompose
= NULL
;
687 CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar
= NULL
;
689 if (!converter
) return kCFStringEncodingConverterUnavailable
;
691 if (flags
& kCFStringEncodingSubstituteCombinings
) {
692 if (!(flags
& kCFStringEncodingAllowLossyConversion
)) isValidCombiningChar
= converter
->definition
->isValidCombiningChar
;
694 isValidCombiningChar
= converter
->definition
->isValidCombiningChar
;
695 if (!(flags
& kCFStringEncodingIgnoreCombinings
)) {
696 toBytesPrecompose
= converter
->definition
->toBytesPrecompose
;
697 flags
|= kCFStringEncodingComposeCombinings
;
701 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
702 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUToBytes((const char *)converter
->toBytes
, flags
, characters
, numChars
, usedCharLen
, bytes
, maxByteLen
, usedByteLen
);
705 /* Platform converter */
706 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformUnicodeToBytes(encoding
, flags
, characters
, numChars
, usedCharLen
, bytes
, maxByteLen
, usedByteLen
);
708 while ((usedLen
< numChars
) && (!maxByteLen
|| (theUsedByteLen
< maxByteLen
))) {
709 if ((usedLen
+= TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) < numChars
) {
712 if (isValidCombiningChar
&& (usedLen
> 0) && isValidCombiningChar(characters
[usedLen
])) {
713 if (toBytesPrecompose
) {
714 CFIndex localUsedLen
= usedLen
;
716 while (isValidCombiningChar(characters
[--usedLen
]));
717 theUsedByteLen
+= localUsedByteLen
;
718 if (converter
->definition
->maxBytesPerChar
> 1) {
719 TO_BYTE(converter
, flags
, characters
+ usedLen
, localUsedLen
- usedLen
, NULL
, 0, &localUsedByteLen
);
720 theUsedByteLen
-= localUsedByteLen
;
724 if ((localUsedLen
= toBytesPrecompose(flags
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
)) > 0) {
725 usedLen
+= localUsedLen
;
726 if ((usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
])) { // There is a non-base char not combined remaining
727 theUsedByteLen
+= localUsedByteLen
;
728 theResult
= kCFStringEncodingInvalidInputStream
;
731 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
732 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
735 while (isValidCombiningChar(characters
[++usedLen
]));
736 localUsedByteLen
= 1;
737 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
740 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
743 theResult
= kCFStringEncodingInvalidInputStream
;
746 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
747 theUsedByteLen
+= localUsedByteLen
;
748 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
750 } else if (flags
& kCFStringEncodingIgnoreCombinings
) {
751 while ((++usedLen
< numChars
) && isValidCombiningChar(characters
[usedLen
]));
753 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
755 theUsedByteLen
+= localUsedByteLen
;
758 localUsedByteLen
= 1;
759 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
761 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
764 } else if (maxByteLen
&& ((maxByteLen
== theUsedByteLen
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up
765 theUsedByteLen
+= localUsedByteLen
;
767 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
768 CFIndex localUsedLen
;
770 localUsedByteLen
= 0;
771 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
773 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
775 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
776 uint8_t lossyByte
= CFStringEncodingMaskToLossyByte(flags
);
778 theUsedByteLen
+= localUsedByteLen
;
781 localUsedByteLen
= 1;
782 if (maxByteLen
) *(bytes
+ theUsedByteLen
) = lossyByte
;
784 usedLen
+= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, bytes
+ theUsedByteLen
, (maxByteLen
? maxByteLen
- theUsedByteLen
: 0), &localUsedByteLen
);
787 theUsedByteLen
+= localUsedByteLen
;
788 theResult
= kCFStringEncodingInvalidInputStream
;
792 theUsedByteLen
+= localUsedByteLen
;
795 if (usedLen
< numChars
&& maxByteLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
796 if (flags
& kCFStringEncodingAllowLossyConversion
&& !CFStringEncodingMaskToLossyByte(flags
)) {
797 CFIndex localUsedLen
;
799 localUsedByteLen
= 0;
800 while ((usedLen
< numChars
) && !localUsedByteLen
&& (localUsedLen
= TO_BYTE_FALLBACK(converter
, characters
+ usedLen
, numChars
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen
+= localUsedLen
;
802 if (usedLen
< numChars
) theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
804 if (usedByteLen
) *usedByteLen
= theUsedByteLen
;
805 if (usedCharLen
) *usedCharLen
= usedLen
;
811 uint32_t CFStringEncodingBytesToUnicode(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, CFIndex
*usedByteLen
, UniChar
*characters
, CFIndex maxCharLen
, CFIndex
*usedCharLen
) {
812 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
814 CFIndex theUsedCharLen
= 0;
815 CFIndex localUsedCharLen
;
816 uint32_t theResult
= kCFStringEncodingConversionSuccess
;
818 if (!converter
) return kCFStringEncodingConverterUnavailable
;
820 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
821 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUToUnicode((const char *)converter
->toBytes
, flags
, bytes
, numBytes
, usedByteLen
, characters
, maxCharLen
, usedCharLen
);
824 /* Platform converter */
825 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformBytesToUnicode(encoding
, flags
, bytes
, numBytes
, usedByteLen
, characters
, maxCharLen
, usedCharLen
);
827 while ((usedLen
< numBytes
) && (!maxCharLen
|| (theUsedCharLen
< maxCharLen
))) {
828 if ((usedLen
+= TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
)) < numBytes
) {
829 CFIndex tempUsedCharLen
;
831 if (maxCharLen
&& ((maxCharLen
== theUsedCharLen
+ localUsedCharLen
) || (((flags
& (kCFStringEncodingUseCanonical
|kCFStringEncodingUseHFSPlusCanonical
)) || (maxCharLen
== theUsedCharLen
+ localUsedCharLen
+ 1)) && TO_UNICODE(converter
, flags
, bytes
+ usedLen
, numBytes
- usedLen
, NULL
, 0, &tempUsedCharLen
)))) { // buffer was filled up
832 theUsedCharLen
+= localUsedCharLen
;
833 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
835 } else if (flags
& kCFStringEncodingAllowLossyConversion
) {
836 theUsedCharLen
+= localUsedCharLen
;
837 usedLen
+= TO_UNICODE_FALLBACK(converter
, bytes
+ usedLen
, numBytes
- usedLen
, characters
+ theUsedCharLen
, (maxCharLen
? maxCharLen
- theUsedCharLen
: 0), &localUsedCharLen
);
839 theUsedCharLen
+= localUsedCharLen
;
840 theResult
= kCFStringEncodingInvalidInputStream
;
844 theUsedCharLen
+= localUsedCharLen
;
847 if (usedLen
< numBytes
&& maxCharLen
&& theResult
== kCFStringEncodingConversionSuccess
) {
848 theResult
= kCFStringEncodingInsufficientOutputBufferLength
;
850 if (usedCharLen
) *usedCharLen
= theUsedCharLen
;
851 if (usedByteLen
) *usedByteLen
= usedLen
;
856 CF_PRIVATE
bool CFStringEncodingIsValidEncoding(uint32_t encoding
) {
857 return (CFStringEncodingGetConverter(encoding
) ? true : false);
860 CF_PRIVATE CFIndex
CFStringEncodingCharLengthForBytes(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
) {
861 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
864 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
865 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUCharLength((const char *)converter
->toBytes
, flags
, bytes
, numBytes
);
868 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformCharLengthForBytes(encoding
, flags
, bytes
, numBytes
);
870 if (1 == converter
->definition
->maxBytesPerChar
) return numBytes
;
872 if (NULL
== converter
->definition
->toUnicodeLen
) {
873 CFIndex usedByteLen
= 0;
874 CFIndex totalLength
= 0;
877 while (numBytes
> 0) {
878 usedByteLen
= TO_UNICODE(converter
, flags
, bytes
, numBytes
, NULL
, 0, &usedCharLen
);
880 bytes
+= usedByteLen
;
881 numBytes
-= usedByteLen
;
882 totalLength
+= usedCharLen
;
885 if (0 == (flags
& kCFStringEncodingAllowLossyConversion
)) return 0;
887 usedByteLen
= TO_UNICODE_FALLBACK(converter
, bytes
, numBytes
, NULL
, 0, &usedCharLen
);
889 bytes
+= usedByteLen
;
890 numBytes
-= usedByteLen
;
891 totalLength
+= usedCharLen
;
897 return converter
->definition
->toUnicodeLen(flags
, bytes
, numBytes
);
904 CF_PRIVATE CFIndex
CFStringEncodingByteLengthForCharacters(uint32_t encoding
, uint32_t flags
, const UniChar
*characters
, CFIndex numChars
) {
905 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
908 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
909 if (kCFStringEncodingConverterICU
== converter
->definition
->encodingClass
) return __CFStringEncodingICUByteLength((const char *)converter
->toBytes
, flags
, characters
, numChars
);
912 if (kCFStringEncodingConverterPlatformSpecific
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformByteLengthForCharacters(encoding
, flags
, characters
, numChars
);
914 if (1 == converter
->definition
->maxBytesPerChar
) return numChars
;
916 if (NULL
== converter
->definition
->toBytesLen
) {
919 return ((kCFStringEncodingConversionSuccess
== CFStringEncodingUnicodeToBytes(encoding
, flags
, characters
, numChars
, NULL
, NULL
, 0, &usedByteLen
)) ? usedByteLen
: 0);
921 return converter
->definition
->toBytesLen(flags
, characters
, numChars
);
928 void CFStringEncodingRegisterFallbackProcedures(uint32_t encoding
, CFStringEncodingToBytesFallbackProc toBytes
, CFStringEncodingToUnicodeFallbackProc toUnicode
) {
929 _CFEncodingConverter
*converter
= (_CFEncodingConverter
*)__CFGetConverter(encoding
);
931 if (NULL
!= converter
) {
932 const CFStringEncodingConverter
*body
= CFStringEncodingGetConverter(encoding
);
934 converter
->toBytesFallback
= ((NULL
== toBytes
) ? ((NULL
== body
) ? __CFDefaultToBytesFallbackProc
: body
->toBytesFallback
) : toBytes
);
935 converter
->toUnicodeFallback
= ((NULL
== toUnicode
) ? ((NULL
== body
) ? __CFDefaultToUnicodeFallbackProc
: body
->toUnicodeFallback
) : toUnicode
);
939 CF_PRIVATE
const CFStringEncodingConverter
*CFStringEncodingGetConverter(uint32_t encoding
) {
940 const _CFEncodingConverter
*converter
= __CFGetConverter(encoding
);
942 return ((NULL
== converter
) ? NULL
: converter
->definition
);
945 static const CFStringEncoding __CFBuiltinEncodings
[] = {
946 kCFStringEncodingMacRoman
,
947 kCFStringEncodingWindowsLatin1
,
948 kCFStringEncodingISOLatin1
,
949 kCFStringEncodingNextStepLatin
,
950 kCFStringEncodingASCII
,
951 kCFStringEncodingUTF8
,
952 /* These seven are available only in CFString-level */
953 kCFStringEncodingNonLossyASCII
,
955 kCFStringEncodingUTF16
,
956 kCFStringEncodingUTF16BE
,
957 kCFStringEncodingUTF16LE
,
959 kCFStringEncodingUTF32
,
960 kCFStringEncodingUTF32BE
,
961 kCFStringEncodingUTF32LE
,
963 kCFStringEncodingInvalidId
,
966 static CFComparisonResult
__CFStringEncodingComparator(const void *v1
, const void *v2
, void *context
) {
967 CFComparisonResult val1
= (*(const CFStringEncoding
*)v1
) & 0xFFFF;
968 CFComparisonResult val2
= (*(const CFStringEncoding
*)v2
) & 0xFFFF;
970 return ((val1
== val2
) ? ((CFComparisonResult
)(*(const CFStringEncoding
*)v1
) - (CFComparisonResult
)(*(const CFStringEncoding
*)v2
)) : val1
- val2
);
973 static void __CFStringEncodingFliterDupes(CFStringEncoding
*encodings
, CFIndex numSlots
) {
974 CFStringEncoding last
= kCFStringEncodingInvalidId
;
975 const CFStringEncoding
*limitEncodings
= encodings
+ numSlots
;
977 while (encodings
< limitEncodings
) {
978 if (last
== *encodings
) {
979 if ((encodings
+ 1) < limitEncodings
) memmove(encodings
, encodings
+ 1, sizeof(CFStringEncoding
) * (limitEncodings
- encodings
- 1));
982 last
= *(encodings
++);
987 CF_PRIVATE
const CFStringEncoding
*CFStringEncodingListOfAvailableEncodings(void) {
988 static const CFStringEncoding
*encodings
= NULL
;
990 if (NULL
== encodings
) {
991 CFStringEncoding
*list
= (CFStringEncoding
*)__CFBuiltinEncodings
;
992 CFIndex numICUConverters
= 0, numPlatformConverters
= 0;
993 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
994 CFStringEncoding
*icuConverters
= __CFStringEncodingCreateICUEncodings(NULL
, &numICUConverters
);
996 CFStringEncoding
*icuConverters
= NULL
;
998 CFStringEncoding
*platformConverters
= __CFStringEncodingCreateListOfAvailablePlatformConverters(NULL
, &numPlatformConverters
);
1000 if ((NULL
!= icuConverters
) || (NULL
!= platformConverters
)) {
1001 CFIndex numSlots
= (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)) + numICUConverters
+ numPlatformConverters
;
1003 list
= (CFStringEncoding
*)CFAllocatorAllocate(NULL
, sizeof(CFStringEncoding
) * numSlots
, 0);
1005 memcpy(list
, __CFBuiltinEncodings
, sizeof(__CFBuiltinEncodings
));
1007 if (NULL
!= icuConverters
) {
1008 memcpy(list
+ (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)), icuConverters
, sizeof(CFStringEncoding
) * numICUConverters
);
1009 CFAllocatorDeallocate(NULL
, icuConverters
);
1012 if (NULL
!= platformConverters
) {
1013 memcpy(list
+ (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)) + numICUConverters
, platformConverters
, sizeof(CFStringEncoding
) * numPlatformConverters
);
1014 CFAllocatorDeallocate(NULL
, platformConverters
);
1017 CFQSortArray(list
, numSlots
, sizeof(CFStringEncoding
), (CFComparatorFunction
)__CFStringEncodingComparator
, NULL
);
1018 __CFStringEncodingFliterDupes(list
, numSlots
);
1020 if (!OSAtomicCompareAndSwapPtrBarrier(NULL
, list
, (void * volatile *)&encodings
) && (list
!= __CFBuiltinEncodings
)) CFAllocatorDeallocate(NULL
, list
);
1029 #undef kSurrogateHighStart
1030 #undef kSurrogateHighEnd
1031 #undef kSurrogateLowStart
1032 #undef kSurrogateLowEnd
1033 #undef TO_BYTE_FALLBACK
1034 #undef TO_UNICODE_FALLBACK
1036 #undef NUM_OF_ENTRIES_CYCLE