2  * Copyright (c) 2013 Apple Inc. All rights reserved. 
   4  * @APPLE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. Please obtain a copy of the License at 
  10  * http://www.opensource.apple.com/apsl/ and read it before using this 
  13  * The Original Code and all software distributed under the License are 
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  18  * Please see the License for the specific language governing rights and 
  19  * limitations under the License. 
  21  * @APPLE_LICENSE_HEADER_END@ 
  24 /*      CFStringEncodingConverter.c 
  25         Copyright (c) 1998-2013, Apple Inc. All rights reserved. 
  26         Responsibility: Aki Inoue 
  29 #include "CFInternal.h" 
  30 #include <CoreFoundation/CFArray.h> 
  31 #include <CoreFoundation/CFDictionary.h> 
  32 #include "CFICUConverters.h" 
  33 #include <CoreFoundation/CFUniChar.h> 
  34 #include <CoreFoundation/CFPriv.h> 
  35 #include "CFUnicodeDecomposition.h" 
  36 #include "CFStringEncodingConverterExt.h" 
  37 #include "CFStringEncodingConverterPriv.h" 
  40 typedef CFIndex (*_CFToBytesProc
)(const void *converter
, uint32_t flags
, const UniChar 
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex 
*usedByteLen
); 
  41 typedef CFIndex (*_CFToUnicodeProc
)(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar 
*characters
, CFIndex maxCharLen
, CFIndex 
*usedCharLen
); 
  44     const CFStringEncodingConverter 
*definition
; 
  45     _CFToBytesProc toBytes
; 
  46     _CFToUnicodeProc toUnicode
; 
  47     _CFToUnicodeProc toCanonicalUnicode
; 
  48     CFStringEncodingToBytesFallbackProc toBytesFallback
; 
  49     CFStringEncodingToUnicodeFallbackProc toUnicodeFallback
; 
  50 } _CFEncodingConverter
; 
  54 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->definition->toBytes)(flags,chars,numChars,bytes,max,used)) 
  55 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->toUnicode ?  (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->definition->toUnicode)(flags,bytes,numBytes,chars,max,used)) 
  57 #define ASCIINewLine 0x0a 
  58 #define kSurrogateHighStart 0xD800 
  59 #define kSurrogateHighEnd 0xDBFF 
  60 #define kSurrogateLowStart 0xDC00 
  61 #define kSurrogateLowEnd 0xDFFF 
  63 static const uint8_t __CFMaximumConvertedLength 
= 20; 
  65 /* Mapping 128..255 to lossy ASCII 
  68     unsigned char chars
[4]; 
  69 } _toLossyASCIITable
[] = { 
  70     {{' ', 0, 0, 0}}, // NO-BREAK SPACE 
  71     {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK 
  72     {{'c', 0, 0, 0}}, // CENT SIGN 
  73     {{'L', 0, 0, 0}}, // POUND SIGN 
  74     {{'$', 0, 0, 0}}, // CURRENCY SIGN 
  75     {{'Y', 0, 0, 0}}, // YEN SIGN 
  76     {{'|', 0, 0, 0}}, // BROKEN BAR 
  77     {{0, 0, 0, 0}}, // SECTION SIGN 
  78     {{0, 0, 0, 0}}, // DIAERESIS 
  79     {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN 
  80     {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR 
  81     {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 
  82     {{0, 0, 0, 0}}, // NOT SIGN 
  83     {{'-', 0, 0, 0}}, // SOFT HYPHEN 
  84     {{'(', 'R', ')', 0}}, // REGISTERED SIGN 
  85     {{0, 0, 0, 0}}, // MACRON 
  86     {{0, 0, 0, 0}}, // DEGREE SIGN 
  87     {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN 
  88     {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO 
  89     {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE 
  90     {{0, 0, 0, 0}}, // ACUTE ACCENT 
  91     {{0, 0, 0, 0}}, // MICRO SIGN 
  92     {{0, 0, 0, 0}}, // PILCROW SIGN 
  93     {{0, 0, 0, 0}}, // MIDDLE DOT 
  94     {{0, 0, 0, 0}}, // CEDILLA 
  95     {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE 
  96     {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR 
  97     {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 
  98     {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER 
  99     {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF 
 100     {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS 
 101     {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK 
 102     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE 
 103     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE 
 104     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX 
 105     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE 
 106     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS 
 107     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE 
 108     {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE 
 109     {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA 
 110     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE 
 111     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE 
 112     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX 
 113     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS 
 114     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE 
 115     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE 
 116     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX 
 117     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS 
 118     {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic) 
 119     {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE 
 120     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE 
 121     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE 
 122     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX 
 123     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE 
 124     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS 
 125     {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN 
 126     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE 
 127     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE 
 128     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE 
 129     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX 
 130     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS 
 131     {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE 
 132     {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic) 
 133     {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German) 
 134     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE 
 135     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE 
 136     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX 
 137     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE 
 138     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS 
 139     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE 
 140     {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE 
 141     {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA 
 142     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE 
 143     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE 
 144     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX 
 145     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS 
 146     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE 
 147     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE 
 148     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX 
 149     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS 
 150     {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic) 
 151     {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE 
 152     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE 
 153     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE 
 154     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX 
 155     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE 
 156     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS 
 157     {{'/', 0, 0, 0}}, // DIVISION SIGN 
 158     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE 
 159     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE 
 160     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE 
 161     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX 
 162     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS 
 163     {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE 
 164     {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic) 
 165     {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS 
 168 CF_INLINE CFIndex 
__CFToASCIILatin1Fallback(UniChar character
, uint8_t *bytes
, CFIndex maxByteLen
) { 
 169     const uint8_t *losChars 
= (const uint8_t*)_toLossyASCIITable 
+ (character 
- 0xA0) * sizeof(uint8_t[4]); 
 170     CFIndex numBytes 
= 0; 
 171     CFIndex idx
, max 
= (maxByteLen 
&& (maxByteLen 
< 4) ? maxByteLen 
: 4); 
 173     for (idx 
= 0;idx 
< max
;idx
++) { 
 175             if (maxByteLen
) bytes
[idx
] = losChars
[idx
]; 
 185 static CFIndex 
__CFDefaultToBytesFallbackProc(const UniChar 
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex 
*usedByteLen
) { 
 186     CFIndex processCharLen 
= 1, filledBytesLen 
= 1; 
 189     if (*characters 
< 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range 
 190         byte 
= (uint8_t)(*characters 
- 0x80); 
 191     } else if (*characters 
< 0x100) { 
 192         *usedByteLen 
= __CFToASCIILatin1Fallback(*characters
, bytes
, maxByteLen
); 
 194     } else if (*characters 
>= kSurrogateHighStart 
&& *characters 
<= kSurrogateLowEnd
) { 
 195         processCharLen 
= (numChars 
> 1 && *characters 
<= kSurrogateLowStart 
&& *(characters 
+ 1) >= kSurrogateLowStart 
&& *(characters 
+ 1) <= kSurrogateLowEnd 
? 2 : 1); 
 196     } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceCharacterSet
)) { 
 198     } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharWhitespaceAndNewlineCharacterSet
)) { 
 200     } else if (*characters 
== 0x2026) { // ellipsis 
 201         if (0 == maxByteLen
) { 
 203         } else if (maxByteLen 
> 2) { 
 204             memset(bytes
, '.', 3); 
 206             return processCharLen
; 
 208     } else if (CFUniCharIsMemberOf(*characters
, kCFUniCharDecomposableCharacterSet
)) { 
 209         UTF32Char decomposed
[MAX_DECOMPOSED_LENGTH
]; 
 211         (void)CFUniCharDecomposeCharacter(*characters
, decomposed
, MAX_DECOMPOSED_LENGTH
); 
 212         if (*decomposed 
< 0x80) { 
 213             byte 
= (uint8_t)(*decomposed
); 
 215             UTF16Char theChar 
= *decomposed
; 
 217             return __CFDefaultToBytesFallbackProc(&theChar
, 1, bytes
, maxByteLen
, usedByteLen
); 
 221     if (maxByteLen
) *bytes 
= byte
; 
 222     *usedByteLen 
= filledBytesLen
; 
 223     return processCharLen
; 
 226 static CFIndex 
__CFDefaultToUnicodeFallbackProc(const uint8_t *bytes
, CFIndex numBytes
, UniChar 
*characters
, CFIndex maxCharLen
, CFIndex 
*usedCharLen
) { 
 227     if (maxCharLen
) *characters 
= (UniChar
)'?'; 
 232 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used)) 
 233 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used)) 
 235 #define EXTRA_BASE (0x0F00) 
 237 /* Wrapper funcs for non-standard converters 
 239 static CFIndex 
__CFToBytesCheapEightBitWrapper(const void *converter
, uint32_t flags
, const UniChar 
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex 
*usedByteLen
) { 
 240     CFIndex processedCharLen 
= 0; 
 241     CFIndex length 
= (maxByteLen 
&& (maxByteLen 
< numChars
) ? maxByteLen 
: numChars
); 
 244     while (processedCharLen 
< length
) { 
 245         if (!((CFStringEncodingCheapEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
[processedCharLen
], &byte
)) break; 
 247         if (maxByteLen
) bytes
[processedCharLen
] = byte
; 
 251     *usedByteLen 
= processedCharLen
; 
 252     return processedCharLen
; 
 255 static CFIndex 
__CFToUnicodeCheapEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar 
*characters
, CFIndex maxCharLen
, CFIndex 
*usedCharLen
) { 
 256     CFIndex processedByteLen 
= 0; 
 257     CFIndex length 
= (maxCharLen 
&& (maxCharLen 
< numBytes
) ? maxCharLen 
: numBytes
); 
 260     while (processedByteLen 
< length
) { 
 261         if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break; 
 263         if (maxCharLen
) characters
[processedByteLen
] = character
; 
 267     *usedCharLen 
= processedByteLen
; 
 268     return processedByteLen
; 
 271 static CFIndex 
__CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar 
*characters
, CFIndex maxCharLen
, CFIndex 
*usedCharLen
) { 
 272     CFIndex processedByteLen 
= 0; 
 273     CFIndex theUsedCharLen 
= 0; 
 274     UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
]; 
 277     bool isHFSPlus 
= (flags 
& kCFStringEncodingUseHFSPlusCanonical 
? true : false); 
 279     while ((processedByteLen 
< numBytes
) && (!maxCharLen 
|| (theUsedCharLen 
< maxCharLen
))) { 
 280         if (!((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], &character
)) break; 
 282         if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) { 
 285             usedLen 
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
); 
 286             *usedCharLen 
= theUsedCharLen
; 
 288             for (idx 
= 0;idx 
< usedLen
;idx
++) { 
 289                 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP 
 290                     if (theUsedCharLen 
+ 2 > maxCharLen
)  return processedByteLen
; 
 293                         charBuffer
[idx
] = charBuffer
[idx
] - 0x10000; 
 294                         *(characters
++) = (UniChar
)(charBuffer
[idx
] >> 10) + 0xD800UL
; 
 295                         *(characters
++) = (UniChar
)(charBuffer
[idx
] & 0x3FF) + 0xDC00UL
; 
 298                     if (theUsedCharLen 
+ 1 > maxCharLen
)  return processedByteLen
; 
 300                     *(characters
++) = charBuffer
[idx
]; 
 304             if (maxCharLen
) *(characters
++) = character
; 
 310     *usedCharLen 
= theUsedCharLen
; 
 311     return processedByteLen
; 
 314 static CFIndex 
__CFToBytesStandardEightBitWrapper(const void *converter
, uint32_t flags
, const UniChar 
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex 
*usedByteLen
) { 
 315     CFIndex processedCharLen 
= 0; 
 321     while (numChars 
&& (!maxByteLen 
|| (*usedByteLen 
< maxByteLen
))) { 
 322         if (!(usedLen 
= ((CFStringEncodingStandardEightBitToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
, numChars
, &byte
))) break; 
 324         if (maxByteLen
) bytes
[*usedByteLen
] = byte
; 
 326         characters 
+= usedLen
; 
 328         processedCharLen 
+= usedLen
; 
 331     return processedCharLen
; 
 334 static CFIndex 
__CFToUnicodeStandardEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar 
*characters
, CFIndex maxCharLen
, CFIndex 
*usedCharLen
) { 
 335     CFIndex processedByteLen 
= 0; 
 336     UniChar charBuffer
[__CFMaximumConvertedLength
]; 
 341     while ((processedByteLen 
< numBytes
) && (!maxCharLen 
|| (*usedCharLen 
< maxCharLen
))) { 
 342         if (!(usedLen 
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break; 
 347             if (*usedCharLen 
+ usedLen 
> maxCharLen
) break; 
 349             for (idx 
= 0;idx 
< usedLen
;idx
++) { 
 350                 characters
[*usedCharLen 
+ idx
] = charBuffer
[idx
]; 
 353         *usedCharLen 
+= usedLen
; 
 357     return processedByteLen
; 
 360 static CFIndex 
__CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar 
*characters
, CFIndex maxCharLen
, CFIndex 
*usedCharLen
) { 
 361     CFIndex processedByteLen 
= 0; 
 362     UniChar charBuffer
[__CFMaximumConvertedLength
]; 
 363     UTF32Char decompBuffer
[MAX_DECOMPOSED_LENGTH
]; 
 366     CFIndex idx
, decompIndex
; 
 367     bool isHFSPlus 
= (flags 
& kCFStringEncodingUseHFSPlusCanonical 
? true : false); 
 368     CFIndex theUsedCharLen 
= 0; 
 370     while ((processedByteLen 
< numBytes
) && (!maxCharLen 
|| (theUsedCharLen 
< maxCharLen
))) { 
 371         if (!(usedLen 
= ((CFStringEncodingCheapEightBitToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
[processedByteLen
], charBuffer
))) break; 
 373         for (idx 
= 0;idx 
< usedLen
;idx
++) { 
 374             if (CFUniCharIsDecomposableCharacter(charBuffer
[idx
], isHFSPlus
)) { 
 375                 decompedLen 
= CFUniCharDecomposeCharacter(charBuffer
[idx
], decompBuffer
, MAX_DECOMPOSED_LENGTH
); 
 376                 *usedCharLen 
= theUsedCharLen
; 
 378                 for (decompIndex 
= 0;decompIndex 
< decompedLen
;decompIndex
++) { 
 379                     if (decompBuffer
[decompIndex
] > 0xFFFF) { // Non-BMP 
 380                         if (theUsedCharLen 
+ 2 > maxCharLen
)  return processedByteLen
; 
 383                             charBuffer
[idx
] = charBuffer
[idx
] - 0x10000; 
 384                             *(characters
++) = (charBuffer
[idx
] >> 10) + 0xD800UL
; 
 385                             *(characters
++) = (charBuffer
[idx
] & 0x3FF) + 0xDC00UL
; 
 388                         if (theUsedCharLen 
+ 1 > maxCharLen
)  return processedByteLen
; 
 390                         *(characters
++) = charBuffer
[idx
]; 
 394                 if (maxCharLen
) *(characters
++) = charBuffer
[idx
]; 
 401     *usedCharLen 
= theUsedCharLen
; 
 402     return processedByteLen
; 
 405 static CFIndex 
__CFToBytesCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const UniChar 
*characters
, CFIndex numChars
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex 
*usedByteLen
) { 
 406     CFIndex processedCharLen 
= 0; 
 407     uint8_t byteBuffer
[__CFMaximumConvertedLength
]; 
 412     while ((processedCharLen 
< numChars
) && (!maxByteLen 
|| (*usedByteLen 
< maxByteLen
))) { 
 413         if (!(usedLen 
= ((CFStringEncodingCheapMultiByteToBytesProc
)((const _CFEncodingConverter
*)converter
)->definition
->toBytes
)(flags
, characters
[processedCharLen
], byteBuffer
))) break; 
 418             if (*usedByteLen 
+ usedLen 
> maxByteLen
) break; 
 420             for (idx 
= 0;idx 
<usedLen
;idx
++) { 
 421                 bytes
[*usedByteLen 
+ idx
] = byteBuffer
[idx
]; 
 425         *usedByteLen 
+= usedLen
; 
 429     return processedCharLen
; 
 432 static CFIndex 
__CFToUnicodeCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar 
*characters
, CFIndex maxCharLen
, CFIndex 
*usedCharLen
) { 
 433     CFIndex processedByteLen 
= 0; 
 439     while (numBytes 
&& (!maxCharLen 
|| (*usedCharLen 
< maxCharLen
))) { 
 440         if (!(usedLen 
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
, numBytes
, &character
))) break; 
 442         if (maxCharLen
) *(characters
++) = character
; 
 444         processedByteLen 
+= usedLen
; 
 449     return processedByteLen
; 
 452 static CFIndex 
__CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, UniChar 
*characters
, CFIndex maxCharLen
, CFIndex 
*usedCharLen
) { 
 453     CFIndex processedByteLen 
= 0; 
 454     UTF32Char charBuffer
[MAX_DECOMPOSED_LENGTH
]; 
 457     CFIndex decomposedLen
; 
 458     CFIndex theUsedCharLen 
= 0; 
 459     bool isHFSPlus 
= (flags 
& kCFStringEncodingUseHFSPlusCanonical 
? true : false); 
 461     while (numBytes 
&& (!maxCharLen 
|| (theUsedCharLen 
< maxCharLen
))) { 
 462         if (!(usedLen 
= ((CFStringEncodingCheapMultiByteToUnicodeProc
)((const _CFEncodingConverter
*)converter
)->definition
->toUnicode
)(flags
, bytes
, numBytes
, &character
))) break; 
 464         if (CFUniCharIsDecomposableCharacter(character
, isHFSPlus
)) { 
 467             decomposedLen 
= CFUniCharDecomposeCharacter(character
, charBuffer
, MAX_DECOMPOSED_LENGTH
); 
 468             *usedCharLen 
= theUsedCharLen
; 
 470             for (idx 
= 0;idx 
< decomposedLen
;idx
++) { 
 471                 if (charBuffer
[idx
] > 0xFFFF) { // Non-BMP 
 472                     if (theUsedCharLen 
+ 2 > maxCharLen
)  return processedByteLen
; 
 475                         charBuffer
[idx
] = charBuffer
[idx
] - 0x10000; 
 476                         *(characters
++) = (UniChar
)(charBuffer
[idx
] >> 10) + 0xD800UL
; 
 477                         *(characters
++) = (UniChar
)(charBuffer
[idx
] & 0x3FF) + 0xDC00UL
; 
 480                     if (theUsedCharLen 
+ 1 > maxCharLen
)  return processedByteLen
; 
 482                     *(characters
++) = charBuffer
[idx
]; 
 486             if (maxCharLen
) *(characters
++) = character
; 
 490         processedByteLen 
+= usedLen
; 
 494     *usedCharLen 
= theUsedCharLen
; 
 495     return processedByteLen
; 
 500 CF_INLINE _CFEncodingConverter 
*__CFEncodingConverterFromDefinition(const CFStringEncodingConverter 
*definition
, CFStringEncoding encoding
) { 
 501 #define NUM_OF_ENTRIES_CYCLE (10) 
 502     static uint32_t _currentIndex 
= 0; 
 503     static uint32_t _allocatedSize 
= 0; 
 504     static _CFEncodingConverter 
*_allocatedEntries 
= NULL
; 
 505     _CFEncodingConverter 
*converter
; 
 508     if ((_currentIndex 
+ 1) >= _allocatedSize
) { 
 511         _allocatedEntries 
= NULL
; 
 513     if (_allocatedEntries 
== NULL
) { // Not allocated yet 
 514         _allocatedEntries 
= (_CFEncodingConverter 
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(_CFEncodingConverter
) * NUM_OF_ENTRIES_CYCLE
, 0); 
 515         _allocatedSize 
= NUM_OF_ENTRIES_CYCLE
; 
 516         converter 
= &(_allocatedEntries
[_currentIndex
]); 
 518         converter 
= &(_allocatedEntries
[++_currentIndex
]); 
 521     memset(converter
, 0, sizeof(_CFEncodingConverter
)); 
 523     converter
->definition 
= definition
; 
 525     switch (definition
->encodingClass
) { 
 526         case kCFStringEncodingConverterStandard
: 
 527             converter
->toBytes 
= NULL
; 
 528             converter
->toUnicode 
= NULL
; 
 529             converter
->toCanonicalUnicode 
= NULL
; 
 532         case kCFStringEncodingConverterCheapEightBit
: 
 533             converter
->toBytes 
= __CFToBytesCheapEightBitWrapper
; 
 534             converter
->toUnicode 
= __CFToUnicodeCheapEightBitWrapper
; 
 535             converter
->toCanonicalUnicode 
= __CFToCanonicalUnicodeCheapEightBitWrapper
; 
 538         case kCFStringEncodingConverterStandardEightBit
: 
 539             converter
->toBytes 
= __CFToBytesStandardEightBitWrapper
; 
 540             converter
->toUnicode 
= __CFToUnicodeStandardEightBitWrapper
; 
 541             converter
->toCanonicalUnicode 
= __CFToCanonicalUnicodeStandardEightBitWrapper
; 
 544         case kCFStringEncodingConverterCheapMultiByte
: 
 545             converter
->toBytes 
= __CFToBytesCheapMultiByteWrapper
; 
 546             converter
->toUnicode 
= __CFToUnicodeCheapMultiByteWrapper
; 
 547             converter
->toCanonicalUnicode 
= __CFToCanonicalUnicodeCheapMultiByteWrapper
; 
 550 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 
 551         case kCFStringEncodingConverterICU
: 
 552             converter
->toBytes 
= (_CFToBytesProc
)__CFStringEncodingGetICUName(encoding
); 
 556         case kCFStringEncodingConverterPlatformSpecific
: 
 559         default: // Shouln't be here 
 563     converter
->toBytesFallback 
= (definition
->toBytesFallback 
? definition
->toBytesFallback 
: __CFDefaultToBytesFallbackProc
); 
 564     converter
->toUnicodeFallback 
= (definition
->toUnicodeFallback 
? definition
->toUnicodeFallback 
: __CFDefaultToUnicodeFallbackProc
); 
 569 CF_INLINE 
const CFStringEncodingConverter 
*__CFStringEncodingConverterGetDefinition(CFStringEncoding encoding
) { 
 571         case kCFStringEncodingUTF8
: 
 572             return &__CFConverterUTF8
; 
 574         case kCFStringEncodingMacRoman
: 
 575             return &__CFConverterMacRoman
; 
 577         case kCFStringEncodingWindowsLatin1
: 
 578             return &__CFConverterWinLatin1
; 
 580         case kCFStringEncodingASCII
: 
 581             return &__CFConverterASCII
; 
 583         case kCFStringEncodingISOLatin1
: 
 584             return &__CFConverterISOLatin1
; 
 587         case kCFStringEncodingNextStepLatin
: 
 588             return &__CFConverterNextStepLatin
; 
 592             return __CFStringEncodingGetExternalConverter(encoding
); 
 596 static const _CFEncodingConverter 
*__CFGetConverter(uint32_t encoding
) { 
 597     const _CFEncodingConverter 
*converter 
= NULL
; 
 598     const _CFEncodingConverter 
**commonConverterSlot 
= NULL
; 
 599     static _CFEncodingConverter 
*commonConverters
[3] = {NULL
, NULL
, NULL
}; // UTF8, MacRoman/WinLatin1, and the default encoding* 
 600     static CFMutableDictionaryRef mappingTable 
= NULL
; 
 601     static CFSpinLock_t lock 
= CFSpinLockInit
; 
 604         case kCFStringEncodingUTF8
: commonConverterSlot 
= (const _CFEncodingConverter 
**)&(commonConverters
[0]); break; 
 606             /* the swith here should avoid possible bootstrap issues in the default: case below when invoked from CFStringGetSystemEncoding() */ 
 607 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX 
 608         case kCFStringEncodingMacRoman
: commonConverterSlot 
= (const _CFEncodingConverter 
**)&(commonConverters
[1]); break; 
 609 #elif DEPLOYMENT_TARGET_WINDOWS 
 610         case kCFStringEncodingWindowsLatin1
: commonConverterSlot 
= (const _CFEncodingConverter 
**)(&(commonConverters
[1])); break; 
 612 #warning This case must match __defaultEncoding value defined in CFString.c 
 613         case kCFStringEncodingISOLatin1
: commonConverterSlot 
= (const _CFEncodingConverter 
**)(&(commonConverters
[1])); break; 
 616         default: if (CFStringGetSystemEncoding() == encoding
) commonConverterSlot 
= (const _CFEncodingConverter 
**)&(commonConverters
[2]); break; 
 620     converter 
= ((NULL 
== commonConverterSlot
) ? ((NULL 
== mappingTable
) ? NULL 
: (const _CFEncodingConverter 
*)CFDictionaryGetValue(mappingTable
, (const void *)(uintptr_t)encoding
)) : *commonConverterSlot
); 
 621     __CFSpinUnlock(&lock
); 
 623     if (NULL 
== converter
) { 
 624         const CFStringEncodingConverter 
*definition 
= __CFStringEncodingConverterGetDefinition(encoding
); 
 626         if (NULL 
!= definition
) { 
 628             converter 
= ((NULL 
== commonConverterSlot
) ? ((NULL 
== mappingTable
) ? NULL 
: (const _CFEncodingConverter 
*)CFDictionaryGetValue(mappingTable
, (const void *)(uintptr_t)encoding
)) : *commonConverterSlot
); 
 630             if (NULL 
== converter
) { 
 631                 converter 
= __CFEncodingConverterFromDefinition(definition
, encoding
); 
 633                 if (NULL 
== commonConverterSlot
) { 
 634                     if (NULL 
== mappingTable
) mappingTable 
= CFDictionaryCreateMutable(NULL
, 0, NULL
, NULL
); 
 636                     CFDictionarySetValue(mappingTable
, (const void *)(uintptr_t)encoding
, converter
); 
 638                     *commonConverterSlot 
= converter
; 
 641             __CFSpinUnlock(&lock
); 
 650 uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding
, uint32_t flags
, const UniChar 
*characters
, CFIndex numChars
, CFIndex 
*usedCharLen
, uint8_t *bytes
, CFIndex maxByteLen
, CFIndex 
*usedByteLen
) { 
 651     if (encoding 
== kCFStringEncodingUTF8
) { 
 652         static CFStringEncodingToBytesProc __CFToUTF8 
= NULL
; 
 653         CFIndex convertedCharLen
; 
 657         if ((flags 
& kCFStringEncodingUseCanonical
) || (flags 
& kCFStringEncodingUseHFSPlusCanonical
)) { 
 658             (void)CFUniCharDecompose(characters
, numChars
, &convertedCharLen
, (void *)bytes
, maxByteLen
, &usedLen
, true, kCFUniCharUTF8Format
, (flags 
& kCFStringEncodingUseHFSPlusCanonical 
? true : false)); 
 661                 const CFStringEncodingConverter 
*utf8Converter 
= CFStringEncodingGetConverter(kCFStringEncodingUTF8
); 
 662                 __CFToUTF8 
= (CFStringEncodingToBytesProc
)utf8Converter
->toBytes
; 
 664             convertedCharLen 
= __CFToUTF8(0, characters
, numChars
, bytes
, maxByteLen
, &usedLen
); 
 666         if (usedCharLen
) *usedCharLen 
= convertedCharLen
; 
 667         if (usedByteLen
) *usedByteLen 
= usedLen
; 
 669         if (convertedCharLen 
== numChars
) { 
 670             return kCFStringEncodingConversionSuccess
; 
 671         } else if ((maxByteLen 
> 0) && ((maxByteLen 
- usedLen
) < 10)) { // could be filled outbuf 
 672             UTF16Char character 
= characters
[convertedCharLen
]; 
 674             if (((character 
>= kSurrogateLowStart
) && (character 
<= kSurrogateLowEnd
)) || ((character 
>= kSurrogateHighStart
) && (character 
<= kSurrogateHighEnd
) && ((1 == (numChars 
- convertedCharLen
)) || (characters
[convertedCharLen 
+ 1] < kSurrogateLowStart
) || (characters
[convertedCharLen 
+ 1] > kSurrogateLowEnd
)))) return kCFStringEncodingInvalidInputStream
; 
 676             return kCFStringEncodingInsufficientOutputBufferLength
; 
 678             return kCFStringEncodingInvalidInputStream
; 
 681         const _CFEncodingConverter 
*converter 
= __CFGetConverter(encoding
); 
 683         CFIndex localUsedByteLen
; 
 684         CFIndex theUsedByteLen 
= 0; 
 685         uint32_t theResult 
= kCFStringEncodingConversionSuccess
; 
 686         CFStringEncodingToBytesPrecomposeProc toBytesPrecompose 
= NULL
; 
 687         CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar 
= NULL
; 
 689         if (!converter
) return kCFStringEncodingConverterUnavailable
; 
 691         if (flags 
& kCFStringEncodingSubstituteCombinings
) { 
 692             if (!(flags 
& kCFStringEncodingAllowLossyConversion
)) isValidCombiningChar 
= converter
->definition
->isValidCombiningChar
; 
 694             isValidCombiningChar 
= converter
->definition
->isValidCombiningChar
; 
 695             if (!(flags 
& kCFStringEncodingIgnoreCombinings
)) { 
 696                 toBytesPrecompose 
= converter
->definition
->toBytesPrecompose
; 
 697                 flags 
|= kCFStringEncodingComposeCombinings
; 
 701 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 
 702         if (kCFStringEncodingConverterICU 
== converter
->definition
->encodingClass
) return __CFStringEncodingICUToBytes((const char *)converter
->toBytes
, flags
, characters
, numChars
, usedCharLen
, bytes
, maxByteLen
, usedByteLen
); 
 705         /* Platform converter */ 
 706         if (kCFStringEncodingConverterPlatformSpecific 
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformUnicodeToBytes(encoding
, flags
, characters
, numChars
, usedCharLen
, bytes
, maxByteLen
, usedByteLen
); 
 708         while ((usedLen 
< numChars
) && (!maxByteLen 
|| (theUsedByteLen 
< maxByteLen
))) { 
 709             if ((usedLen 
+= TO_BYTE(converter
, flags
, characters 
+ usedLen
, numChars 
- usedLen
, bytes 
+ theUsedByteLen
, (maxByteLen 
? maxByteLen 
- theUsedByteLen 
: 0), &localUsedByteLen
)) < numChars
) { 
 712                 if (isValidCombiningChar 
&& (usedLen 
> 0) && isValidCombiningChar(characters
[usedLen
])) { 
 713                     if (toBytesPrecompose
) { 
 714                         CFIndex localUsedLen 
= usedLen
; 
 716                         while (isValidCombiningChar(characters
[--usedLen
])); 
 717                         theUsedByteLen 
+= localUsedByteLen
; 
 718                         if (converter
->definition
->maxBytesPerChar 
> 1) { 
 719                             TO_BYTE(converter
, flags
, characters 
+ usedLen
, localUsedLen 
- usedLen
, NULL
, 0, &localUsedByteLen
); 
 720                             theUsedByteLen 
-= localUsedByteLen
; 
 724                         if ((localUsedLen 
= toBytesPrecompose(flags
, characters 
+ usedLen
, numChars 
- usedLen
, bytes 
+ theUsedByteLen
, (maxByteLen 
? maxByteLen 
- theUsedByteLen 
: 0), &localUsedByteLen
)) > 0) { 
 725                             usedLen 
+= localUsedLen
; 
 726                             if ((usedLen 
< numChars
) && isValidCombiningChar(characters
[usedLen
])) { // There is a non-base char not combined remaining 
 727                                 theUsedByteLen 
+= localUsedByteLen
; 
 728                                 theResult 
= kCFStringEncodingInvalidInputStream
; 
 731                         } else if (flags 
& kCFStringEncodingAllowLossyConversion
) { 
 732                             uint8_t lossyByte 
= CFStringEncodingMaskToLossyByte(flags
); 
 735                                 while (isValidCombiningChar(characters
[++usedLen
])); 
 736                                 localUsedByteLen 
= 1; 
 737                                 if (maxByteLen
) *(bytes 
+ theUsedByteLen
) = lossyByte
; 
 740                                 usedLen 
+= TO_BYTE_FALLBACK(converter
, characters 
+ usedLen
, numChars 
- usedLen
, bytes 
+ theUsedByteLen
, (maxByteLen 
? maxByteLen 
- theUsedByteLen 
: 0), &localUsedByteLen
); 
 743                             theResult 
= kCFStringEncodingInvalidInputStream
; 
 746                     } else if (maxByteLen 
&& ((maxByteLen 
== theUsedByteLen 
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters 
+ usedLen
, numChars 
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up 
 747                                     theUsedByteLen 
+= localUsedByteLen
; 
 748                                     theResult 
= kCFStringEncodingInsufficientOutputBufferLength
; 
 750                     } else if (flags 
& kCFStringEncodingIgnoreCombinings
) { 
 751                         while ((++usedLen 
< numChars
) && isValidCombiningChar(characters
[usedLen
])); 
 753                         uint8_t lossyByte 
= CFStringEncodingMaskToLossyByte(flags
); 
 755                         theUsedByteLen 
+= localUsedByteLen
; 
 758                             localUsedByteLen 
= 1; 
 759                             if (maxByteLen
) *(bytes 
+ theUsedByteLen
) = lossyByte
; 
 761                             usedLen 
+= TO_BYTE_FALLBACK(converter
, characters 
+ usedLen
, numChars 
- usedLen
, bytes 
+ theUsedByteLen
, (maxByteLen 
? maxByteLen 
- theUsedByteLen 
: 0), &localUsedByteLen
); 
 764                 } else if (maxByteLen 
&& ((maxByteLen 
== theUsedByteLen 
+ localUsedByteLen
) || TO_BYTE(converter
, flags
, characters 
+ usedLen
, numChars 
- usedLen
, NULL
, 0, &dummy
))) { // buffer was filled up 
 765                     theUsedByteLen 
+= localUsedByteLen
; 
 767                     if (flags 
& kCFStringEncodingAllowLossyConversion 
&& !CFStringEncodingMaskToLossyByte(flags
)) { 
 768                         CFIndex localUsedLen
; 
 770                         localUsedByteLen 
= 0; 
 771                         while ((usedLen 
< numChars
) && !localUsedByteLen 
&& (localUsedLen 
= TO_BYTE_FALLBACK(converter
, characters 
+ usedLen
, numChars 
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen 
+= localUsedLen
; 
 773                     if (usedLen 
< numChars
) theResult 
= kCFStringEncodingInsufficientOutputBufferLength
; 
 775                 } else if (flags 
& kCFStringEncodingAllowLossyConversion
) { 
 776                     uint8_t lossyByte 
= CFStringEncodingMaskToLossyByte(flags
); 
 778                     theUsedByteLen 
+= localUsedByteLen
; 
 781                         localUsedByteLen 
= 1; 
 782                         if (maxByteLen
) *(bytes 
+ theUsedByteLen
) = lossyByte
; 
 784                         usedLen 
+= TO_BYTE_FALLBACK(converter
, characters 
+ usedLen
, numChars 
- usedLen
, bytes 
+ theUsedByteLen
, (maxByteLen 
? maxByteLen 
- theUsedByteLen 
: 0), &localUsedByteLen
); 
 787                     theUsedByteLen 
+= localUsedByteLen
; 
 788                     theResult 
= kCFStringEncodingInvalidInputStream
; 
 792             theUsedByteLen 
+= localUsedByteLen
; 
 795         if (usedLen 
< numChars 
&& maxByteLen 
&& theResult 
== kCFStringEncodingConversionSuccess
) { 
 796             if (flags 
& kCFStringEncodingAllowLossyConversion 
&& !CFStringEncodingMaskToLossyByte(flags
)) { 
 797                 CFIndex localUsedLen
; 
 799                 localUsedByteLen 
= 0; 
 800                 while ((usedLen 
< numChars
) && !localUsedByteLen 
&& (localUsedLen 
= TO_BYTE_FALLBACK(converter
, characters 
+ usedLen
, numChars 
- usedLen
, NULL
, 0, &localUsedByteLen
))) usedLen 
+= localUsedLen
; 
 802             if (usedLen 
< numChars
) theResult 
= kCFStringEncodingInsufficientOutputBufferLength
; 
 804         if (usedByteLen
) *usedByteLen 
= theUsedByteLen
; 
 805         if (usedCharLen
) *usedCharLen 
= usedLen
; 
 811 uint32_t CFStringEncodingBytesToUnicode(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
, CFIndex 
*usedByteLen
, UniChar 
*characters
, CFIndex maxCharLen
, CFIndex 
*usedCharLen
) { 
 812     const _CFEncodingConverter 
*converter 
= __CFGetConverter(encoding
); 
 814     CFIndex theUsedCharLen 
= 0; 
 815     CFIndex localUsedCharLen
; 
 816     uint32_t theResult 
= kCFStringEncodingConversionSuccess
; 
 818     if (!converter
) return kCFStringEncodingConverterUnavailable
; 
 820 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 
 821     if (kCFStringEncodingConverterICU 
== converter
->definition
->encodingClass
) return __CFStringEncodingICUToUnicode((const char *)converter
->toBytes
, flags
, bytes
, numBytes
, usedByteLen
, characters
, maxCharLen
, usedCharLen
); 
 824     /* Platform converter */ 
 825     if (kCFStringEncodingConverterPlatformSpecific 
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformBytesToUnicode(encoding
, flags
, bytes
, numBytes
, usedByteLen
, characters
, maxCharLen
, usedCharLen
); 
 827     while ((usedLen 
< numBytes
) && (!maxCharLen 
|| (theUsedCharLen 
< maxCharLen
))) { 
 828         if ((usedLen 
+= TO_UNICODE(converter
, flags
, bytes 
+ usedLen
, numBytes 
- usedLen
, characters 
+ theUsedCharLen
, (maxCharLen 
? maxCharLen 
- theUsedCharLen 
: 0), &localUsedCharLen
)) < numBytes
) { 
 829             CFIndex tempUsedCharLen
; 
 831             if (maxCharLen 
&& ((maxCharLen 
== theUsedCharLen 
+ localUsedCharLen
) || (((flags 
& (kCFStringEncodingUseCanonical
|kCFStringEncodingUseHFSPlusCanonical
)) || (maxCharLen 
== theUsedCharLen 
+ localUsedCharLen 
+ 1)) && TO_UNICODE(converter
, flags
, bytes 
+ usedLen
, numBytes 
- usedLen
, NULL
, 0, &tempUsedCharLen
)))) { // buffer was filled up 
 832                 theUsedCharLen 
+= localUsedCharLen
; 
 833                 theResult 
= kCFStringEncodingInsufficientOutputBufferLength
; 
 835             } else if (flags 
& kCFStringEncodingAllowLossyConversion
) { 
 836                 theUsedCharLen 
+= localUsedCharLen
; 
 837                 usedLen 
+= TO_UNICODE_FALLBACK(converter
, bytes 
+ usedLen
, numBytes 
- usedLen
, characters 
+ theUsedCharLen
, (maxCharLen 
? maxCharLen 
- theUsedCharLen 
: 0), &localUsedCharLen
); 
 839                 theUsedCharLen 
+= localUsedCharLen
; 
 840                 theResult 
= kCFStringEncodingInvalidInputStream
; 
 844         theUsedCharLen 
+= localUsedCharLen
; 
 847     if (usedLen 
< numBytes 
&& maxCharLen 
&& theResult 
== kCFStringEncodingConversionSuccess
) { 
 848         theResult 
= kCFStringEncodingInsufficientOutputBufferLength
; 
 850     if (usedCharLen
) *usedCharLen 
= theUsedCharLen
; 
 851     if (usedByteLen
) *usedByteLen 
= usedLen
; 
 856 CF_PRIVATE 
bool CFStringEncodingIsValidEncoding(uint32_t encoding
) { 
 857     return (CFStringEncodingGetConverter(encoding
) ? true : false); 
 860 CF_PRIVATE CFIndex 
CFStringEncodingCharLengthForBytes(uint32_t encoding
, uint32_t flags
, const uint8_t *bytes
, CFIndex numBytes
) { 
 861     const _CFEncodingConverter 
*converter 
= __CFGetConverter(encoding
); 
 864 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 
 865         if (kCFStringEncodingConverterICU 
== converter
->definition
->encodingClass
) return __CFStringEncodingICUCharLength((const char *)converter
->toBytes
, flags
, bytes
, numBytes
); 
 868         if (kCFStringEncodingConverterPlatformSpecific 
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformCharLengthForBytes(encoding
, flags
, bytes
, numBytes
); 
 870         if (1 == converter
->definition
->maxBytesPerChar
) return numBytes
; 
 872         if (NULL 
== converter
->definition
->toUnicodeLen
) { 
 873             CFIndex usedByteLen 
= 0; 
 874             CFIndex totalLength 
= 0; 
 877             while (numBytes 
> 0) { 
 878                 usedByteLen 
= TO_UNICODE(converter
, flags
, bytes
, numBytes
, NULL
, 0, &usedCharLen
); 
 880                 bytes 
+= usedByteLen
; 
 881                 numBytes 
-= usedByteLen
; 
 882                 totalLength 
+= usedCharLen
; 
 885                     if (0 == (flags 
& kCFStringEncodingAllowLossyConversion
)) return 0; 
 887                     usedByteLen 
= TO_UNICODE_FALLBACK(converter
, bytes
, numBytes
, NULL
, 0, &usedCharLen
); 
 889                     bytes 
+= usedByteLen
; 
 890                     numBytes 
-= usedByteLen
; 
 891                     totalLength 
+= usedCharLen
; 
 897             return converter
->definition
->toUnicodeLen(flags
, bytes
, numBytes
); 
 904 CF_PRIVATE CFIndex 
CFStringEncodingByteLengthForCharacters(uint32_t encoding
, uint32_t flags
, const UniChar 
*characters
, CFIndex numChars
) { 
 905     const _CFEncodingConverter 
*converter 
= __CFGetConverter(encoding
); 
 908 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 
 909         if (kCFStringEncodingConverterICU 
== converter
->definition
->encodingClass
) return __CFStringEncodingICUByteLength((const char *)converter
->toBytes
, flags
, characters
, numChars
); 
 912         if (kCFStringEncodingConverterPlatformSpecific 
== converter
->definition
->encodingClass
) return __CFStringEncodingPlatformByteLengthForCharacters(encoding
, flags
, characters
, numChars
); 
 914         if (1 == converter
->definition
->maxBytesPerChar
) return numChars
; 
 916         if (NULL 
== converter
->definition
->toBytesLen
) { 
 919             return ((kCFStringEncodingConversionSuccess 
== CFStringEncodingUnicodeToBytes(encoding
, flags
, characters
, numChars
, NULL
, NULL
, 0, &usedByteLen
)) ? usedByteLen 
: 0); 
 921             return converter
->definition
->toBytesLen(flags
, characters
, numChars
); 
 928 void CFStringEncodingRegisterFallbackProcedures(uint32_t encoding
, CFStringEncodingToBytesFallbackProc toBytes
, CFStringEncodingToUnicodeFallbackProc toUnicode
) { 
 929     _CFEncodingConverter 
*converter 
= (_CFEncodingConverter 
*)__CFGetConverter(encoding
); 
 931     if (NULL 
!= converter
) { 
 932        const CFStringEncodingConverter 
*body 
= CFStringEncodingGetConverter(encoding
); 
 934         converter
->toBytesFallback 
= ((NULL 
== toBytes
) ? ((NULL 
== body
) ? __CFDefaultToBytesFallbackProc 
: body
->toBytesFallback
) : toBytes
); 
 935         converter
->toUnicodeFallback 
= ((NULL 
== toUnicode
) ? ((NULL 
== body
) ? __CFDefaultToUnicodeFallbackProc 
: body
->toUnicodeFallback
) : toUnicode
); 
 939 CF_PRIVATE 
const CFStringEncodingConverter 
*CFStringEncodingGetConverter(uint32_t encoding
) { 
 940     const _CFEncodingConverter 
*converter 
= __CFGetConverter(encoding
); 
 942     return ((NULL 
== converter
) ? NULL 
: converter
->definition
); 
 945 static const CFStringEncoding __CFBuiltinEncodings
[] = { 
 946     kCFStringEncodingMacRoman
, 
 947     kCFStringEncodingWindowsLatin1
, 
 948     kCFStringEncodingISOLatin1
, 
 949     kCFStringEncodingNextStepLatin
, 
 950     kCFStringEncodingASCII
, 
 951     kCFStringEncodingUTF8
, 
 952     /* These seven are available only in CFString-level */ 
 953     kCFStringEncodingNonLossyASCII
, 
 955     kCFStringEncodingUTF16
, 
 956     kCFStringEncodingUTF16BE
, 
 957     kCFStringEncodingUTF16LE
, 
 959     kCFStringEncodingUTF32
, 
 960     kCFStringEncodingUTF32BE
, 
 961     kCFStringEncodingUTF32LE
, 
 963     kCFStringEncodingInvalidId
, 
 966 static CFComparisonResult 
__CFStringEncodingComparator(const void *v1
, const void *v2
, void *context
) { 
 967     CFComparisonResult val1 
= (*(const CFStringEncoding 
*)v1
) & 0xFFFF; 
 968     CFComparisonResult val2 
= (*(const CFStringEncoding 
*)v2
) & 0xFFFF; 
 970     return ((val1 
== val2
) ? ((CFComparisonResult
)(*(const CFStringEncoding 
*)v1
) - (CFComparisonResult
)(*(const CFStringEncoding 
*)v2
)) : val1 
- val2
); 
 973 static void __CFStringEncodingFliterDupes(CFStringEncoding 
*encodings
, CFIndex numSlots
) { 
 974     CFStringEncoding last 
= kCFStringEncodingInvalidId
; 
 975     const CFStringEncoding 
*limitEncodings 
= encodings 
+ numSlots
; 
 977     while (encodings 
< limitEncodings
) { 
 978         if (last 
== *encodings
) { 
 979             if ((encodings 
+ 1) < limitEncodings
) memmove(encodings
, encodings 
+ 1, sizeof(CFStringEncoding
) * (limitEncodings 
- encodings 
- 1)); 
 982             last 
= *(encodings
++); 
 987 CF_PRIVATE 
const CFStringEncoding 
*CFStringEncodingListOfAvailableEncodings(void) { 
 988     static const CFStringEncoding 
*encodings 
= NULL
; 
 990     if (NULL 
== encodings
) { 
 991         CFStringEncoding 
*list 
= (CFStringEncoding 
*)__CFBuiltinEncodings
; 
 992         CFIndex numICUConverters 
= 0, numPlatformConverters 
= 0; 
 993 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 
 994         CFStringEncoding 
*icuConverters 
= __CFStringEncodingCreateICUEncodings(NULL
, &numICUConverters
); 
 996         CFStringEncoding 
*icuConverters 
= NULL
; 
 998         CFStringEncoding 
*platformConverters 
= __CFStringEncodingCreateListOfAvailablePlatformConverters(NULL
, &numPlatformConverters
); 
1000         if ((NULL 
!= icuConverters
) || (NULL 
!= platformConverters
)) { 
1001             CFIndex numSlots 
= (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)) + numICUConverters 
+ numPlatformConverters
; 
1003             list 
= (CFStringEncoding 
*)CFAllocatorAllocate(NULL
, sizeof(CFStringEncoding
) * numSlots
, 0); 
1005             memcpy(list
, __CFBuiltinEncodings
, sizeof(__CFBuiltinEncodings
)); 
1007             if (NULL 
!= icuConverters
) { 
1008                 memcpy(list 
+ (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)), icuConverters
, sizeof(CFStringEncoding
) * numICUConverters
); 
1009                 CFAllocatorDeallocate(NULL
, icuConverters
); 
1012             if (NULL 
!= platformConverters
) { 
1013                 memcpy(list 
+ (sizeof(__CFBuiltinEncodings
) / sizeof(*__CFBuiltinEncodings
)) + numICUConverters
, platformConverters
, sizeof(CFStringEncoding
) * numPlatformConverters
); 
1014                 CFAllocatorDeallocate(NULL
, platformConverters
); 
1017             CFQSortArray(list
, numSlots
, sizeof(CFStringEncoding
), (CFComparatorFunction
)__CFStringEncodingComparator
, NULL
); 
1018             __CFStringEncodingFliterDupes(list
, numSlots
); 
1020         if (!OSAtomicCompareAndSwapPtrBarrier(NULL
, list
, (void * volatile *)&encodings
) && (list 
!= __CFBuiltinEncodings
)) CFAllocatorDeallocate(NULL
, list
); 
1029 #undef kSurrogateHighStart 
1030 #undef kSurrogateHighEnd 
1031 #undef kSurrogateLowStart 
1032 #undef kSurrogateLowEnd 
1033 #undef TO_BYTE_FALLBACK 
1034 #undef TO_UNICODE_FALLBACK 
1036 #undef NUM_OF_ENTRIES_CYCLE