CFStringEncodingConverter.c

   1 /*
   2  * Copyright (c) 2014 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*      CFStringEncodingConverter.c
  25         Copyright (c) 1998-2013, Apple Inc. All rights reserved.
  26         Responsibility: Aki Inoue
  27 */
  28
  29 #include "CFInternal.h"
  30 #include <CoreFoundation/CFArray.h>
  31 #include <CoreFoundation/CFDictionary.h>
  32 #include "CFICUConverters.h"
  33 #include <CoreFoundation/CFUniChar.h>
  34 #include <CoreFoundation/CFPriv.h>
  35 #include "CFUnicodeDecomposition.h"
  36 #include "CFStringEncodingConverterExt.h"
  37 #include "CFStringEncodingConverterPriv.h"
  38 #include <stdlib.h>
  39
  40 typedef CFIndex (*_CFToBytesProc)(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen);
  41 typedef CFIndex (*_CFToUnicodeProc)(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen);
  42
  43 typedef struct {
  44     const CFStringEncodingConverter *definition;
  45     _CFToBytesProc toBytes;
  46     _CFToUnicodeProc toUnicode;
  47     _CFToUnicodeProc toCanonicalUnicode;
  48     CFStringEncodingToBytesFallbackProc toBytesFallback;
  49     CFStringEncodingToUnicodeFallbackProc toUnicodeFallback;
  50 } _CFEncodingConverter;
  51
  52 /* Macros
  53 */
  54 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->definition->toBytes)(flags,chars,numChars,bytes,max,used))
  55 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->toUnicode ?  (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->definition->toUnicode)(flags,bytes,numBytes,chars,max,used))
  56
  57 #define ASCIINewLine 0x0a
  58 #define kSurrogateHighStart 0xD800
  59 #define kSurrogateHighEnd 0xDBFF
  60 #define kSurrogateLowStart 0xDC00
  61 #define kSurrogateLowEnd 0xDFFF
  62
  63 static const uint8_t __CFMaximumConvertedLength = 20;
  64
  65 /* Mapping 128..255 to lossy ASCII
  66 */
  67 static const struct {
  68     unsigned char chars[4];
  69 } _toLossyASCIITable[] = {
  70     {{' ', 0, 0, 0}}, // NO-BREAK SPACE
  71     {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
  72     {{'c', 0, 0, 0}}, // CENT SIGN
  73     {{'L', 0, 0, 0}}, // POUND SIGN
  74     {{'$', 0, 0, 0}}, // CURRENCY SIGN
  75     {{'Y', 0, 0, 0}}, // YEN SIGN
  76     {{'|', 0, 0, 0}}, // BROKEN BAR
  77     {{0, 0, 0, 0}}, // SECTION SIGN
  78     {{0, 0, 0, 0}}, // DIAERESIS
  79     {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
  80     {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
  81     {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
  82     {{0, 0, 0, 0}}, // NOT SIGN
  83     {{'-', 0, 0, 0}}, // SOFT HYPHEN
  84     {{'(', 'R', ')', 0}}, // REGISTERED SIGN
  85     {{0, 0, 0, 0}}, // MACRON
  86     {{0, 0, 0, 0}}, // DEGREE SIGN
  87     {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
  88     {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
  89     {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
  90     {{0, 0, 0, 0}}, // ACUTE ACCENT
  91     {{0, 0, 0, 0}}, // MICRO SIGN
  92     {{0, 0, 0, 0}}, // PILCROW SIGN
  93     {{0, 0, 0, 0}}, // MIDDLE DOT
  94     {{0, 0, 0, 0}}, // CEDILLA
  95     {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
  96     {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
  97     {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
  98     {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
  99     {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
 100     {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
 101     {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
 102     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
 103     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
 104     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 105     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
 106     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
 107     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
 108     {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
 109     {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
 110     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
 111     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
 112     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 113     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
 114     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
 115     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
 116     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 117     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
 118     {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
 119     {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
 120     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
 121     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
 122     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 123     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
 124     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
 125     {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
 126     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
 127     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
 128     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
 129     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 130     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
 131     {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
 132     {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
 133     {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
 134     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
 135     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
 136     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
 137     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
 138     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
 139     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
 140     {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
 141     {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
 142     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
 143     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
 144     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
 145     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
 146     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
 147     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
 148     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
 149     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
 150     {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
 151     {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
 152     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
 153     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
 154     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
 155     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
 156     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
 157     {{'/', 0, 0, 0}}, // DIVISION SIGN
 158     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
 159     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
 160     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
 161     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
 162     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
 163     {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
 164     {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
 165     {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
 166 };
 167
 168 CF_INLINE CFIndex __CFToASCIILatin1Fallback(UniChar character, uint8_t *bytes, CFIndex maxByteLen) {
 169     const uint8_t *losChars = (const uint8_t*)_toLossyASCIITable + (character - 0xA0) * sizeof(uint8_t[4]);
 170     CFIndex numBytes = 0;
 171     CFIndex idx, max = (maxByteLen && (maxByteLen < 4) ? maxByteLen : 4);
 172
 173     for (idx = 0;idx < max;idx++) {
 174         if (losChars[idx]) {
 175             if (maxByteLen) bytes[idx] = losChars[idx];
 176             ++numBytes;
 177         } else {
 178             break;
 179         }
 180     }
 181
 182     return numBytes;
 183 }
 184
 185 static CFIndex __CFDefaultToBytesFallbackProc(const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
 186     CFIndex processCharLen = 1, filledBytesLen = 1;
 187     uint8_t byte = '?';
 188
 189     if (*characters < 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
 190         byte = (uint8_t)(*characters - 0x80);
 191     } else if (*characters < 0x100) {
 192         *usedByteLen = __CFToASCIILatin1Fallback(*characters, bytes, maxByteLen);
 193         return 1;
 194     } else if (*characters >= kSurrogateHighStart && *characters <= kSurrogateLowEnd) {
 195         processCharLen = (numChars > 1 && *characters <= kSurrogateLowStart && *(characters + 1) >= kSurrogateLowStart && *(characters + 1) <= kSurrogateLowEnd ? 2 : 1);
 196     } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceCharacterSet)) {
 197         byte = ' ';
 198     } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceAndNewlineCharacterSet)) {
 199         byte = ASCIINewLine;
 200     } else if (*characters == 0x2026) { // ellipsis
 201         if (0 == maxByteLen) {
 202             filledBytesLen = 3;
 203         } else if (maxByteLen > 2) {
 204             memset(bytes, '.', 3);
 205             *usedByteLen = 3;
 206             return processCharLen;
 207         }
 208     } else if (CFUniCharIsMemberOf(*characters, kCFUniCharDecomposableCharacterSet)) {
 209         UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
 210
 211         (void)CFUniCharDecomposeCharacter(*characters, decomposed, MAX_DECOMPOSED_LENGTH);
 212         if (*decomposed < 0x80) {
 213             byte = (uint8_t)(*decomposed);
 214         } else {
 215             UTF16Char theChar = *decomposed;
 216
 217             return __CFDefaultToBytesFallbackProc(&theChar, 1, bytes, maxByteLen, usedByteLen);
 218         }
 219     }
 220
 221     if (maxByteLen) *bytes = byte;
 222     *usedByteLen = filledBytesLen;
 223     return processCharLen;
 224 }
 225
 226 static CFIndex __CFDefaultToUnicodeFallbackProc(const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
 227     if (maxCharLen) *characters = (UniChar)'?';
 228     *usedCharLen = 1;
 229     return 1;
 230 }
 231
 232 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
 233 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
 234
 235 #define EXTRA_BASE (0x0F00)
 236
 237 /* Wrapper funcs for non-standard converters
 238 */
 239 static CFIndex __CFToBytesCheapEightBitWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
 240     CFIndex processedCharLen = 0;
 241     CFIndex length = (maxByteLen && (maxByteLen < numChars) ? maxByteLen : numChars);
 242     uint8_t byte;
 243
 244     while (processedCharLen < length) {
 245         if (!((CFStringEncodingCheapEightBitToBytesProc)((const _CFEncodingConverter*)converter)->definition->toBytes)(flags, characters[processedCharLen], &byte)) break;
 246
 247         if (maxByteLen) bytes[processedCharLen] = byte;
 248         processedCharLen++;
 249     }
 250
 251     *usedByteLen = processedCharLen;
 252     return processedCharLen;
 253 }
 254
 255 static CFIndex __CFToUnicodeCheapEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
 256     CFIndex processedByteLen = 0;
 257     CFIndex length = (maxCharLen && (maxCharLen < numBytes) ? maxCharLen : numBytes);
 258     UniChar character;
 259
 260     while (processedByteLen < length) {
 261         if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], &character)) break;
 262
 263         if (maxCharLen) characters[processedByteLen] = character;
 264         processedByteLen++;
 265     }
 266
 267     *usedCharLen = processedByteLen;
 268     return processedByteLen;
 269 }
 270
 271 static CFIndex __CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
 272     CFIndex processedByteLen = 0;
 273     CFIndex theUsedCharLen = 0;
 274     UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
 275     CFIndex usedLen;
 276     UniChar character;
 277     bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
 278
 279     while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 280         if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], &character)) break;
 281
 282         if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
 283             CFIndex idx;
 284
 285             usedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
 286             *usedCharLen = theUsedCharLen;
 287
 288             for (idx = 0;idx < usedLen;idx++) {
 289                 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
 290                     if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
 291                     theUsedCharLen += 2;
 292                     if (maxCharLen) {
 293                         charBuffer[idx] = charBuffer[idx] - 0x10000;
 294                         *(characters++) = (UniChar)(charBuffer[idx] >> 10) + 0xD800UL;
 295                         *(characters++) = (UniChar)(charBuffer[idx] & 0x3FF) + 0xDC00UL;
 296                     }
 297                 } else {
 298                     if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
 299                     ++theUsedCharLen;
 300                     *(characters++) = charBuffer[idx];
 301                 }
 302             }
 303         } else {
 304             if (maxCharLen) *(characters++) = character;
 305             ++theUsedCharLen;
 306         }
 307         processedByteLen++;
 308     }
 309
 310     *usedCharLen = theUsedCharLen;
 311     return processedByteLen;
 312 }
 313
 314 static CFIndex __CFToBytesStandardEightBitWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
 315     CFIndex processedCharLen = 0;
 316     uint8_t byte;
 317     CFIndex usedLen;
 318
 319     *usedByteLen = 0;
 320
 321     while (numChars && (!maxByteLen || (*usedByteLen < maxByteLen))) {
 322         if (!(usedLen = ((CFStringEncodingStandardEightBitToBytesProc)((const _CFEncodingConverter*)converter)->definition->toBytes)(flags, characters, numChars, &byte))) break;
 323
 324         if (maxByteLen) bytes[*usedByteLen] = byte;
 325         (*usedByteLen)++;
 326         characters += usedLen;
 327         numChars -= usedLen;
 328         processedCharLen += usedLen;
 329     }
 330
 331     return processedCharLen;
 332 }
 333
 334 static CFIndex __CFToUnicodeStandardEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
 335     CFIndex processedByteLen = 0;
 336     UniChar charBuffer[__CFMaximumConvertedLength];
 337     CFIndex usedLen;
 338
 339     *usedCharLen = 0;
 340
 341     while ((processedByteLen < numBytes) && (!maxCharLen || (*usedCharLen < maxCharLen))) {
 342         if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
 343
 344         if (maxCharLen) {
 345             CFIndex idx;
 346
 347             if (*usedCharLen + usedLen > maxCharLen) break;
 348
 349             for (idx = 0;idx < usedLen;idx++) {
 350                 characters[*usedCharLen + idx] = charBuffer[idx];
 351             }
 352         }
 353         *usedCharLen += usedLen;
 354         processedByteLen++;
 355     }
 356
 357     return processedByteLen;
 358 }
 359
 360 static CFIndex __CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
 361     CFIndex processedByteLen = 0;
 362     UniChar charBuffer[__CFMaximumConvertedLength];
 363     UTF32Char decompBuffer[MAX_DECOMPOSED_LENGTH];
 364     CFIndex usedLen;
 365     CFIndex decompedLen;
 366     CFIndex idx, decompIndex;
 367     bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
 368     CFIndex theUsedCharLen = 0;
 369
 370     while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 371         if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
 372
 373         for (idx = 0;idx < usedLen;idx++) {
 374             if (CFUniCharIsDecomposableCharacter(charBuffer[idx], isHFSPlus)) {
 375                 decompedLen = CFUniCharDecomposeCharacter(charBuffer[idx], decompBuffer, MAX_DECOMPOSED_LENGTH);
 376                 *usedCharLen = theUsedCharLen;
 377
 378                 for (decompIndex = 0;decompIndex < decompedLen;decompIndex++) {
 379                     if (decompBuffer[decompIndex] > 0xFFFF) { // Non-BMP
 380                         if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
 381                         theUsedCharLen += 2;
 382                         if (maxCharLen) {
 383                             charBuffer[idx] = charBuffer[idx] - 0x10000;
 384                             *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
 385                             *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
 386                         }
 387                     } else {
 388                         if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
 389                         ++theUsedCharLen;
 390                         *(characters++) = charBuffer[idx];
 391                     }
 392                 }
 393             } else {
 394                 if (maxCharLen) *(characters++) = charBuffer[idx];
 395                 ++theUsedCharLen;
 396             }
 397         }
 398         processedByteLen++;
 399     }
 400
 401     *usedCharLen = theUsedCharLen;
 402     return processedByteLen;
 403 }
 404
 405 static CFIndex __CFToBytesCheapMultiByteWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
 406     CFIndex processedCharLen = 0;
 407     uint8_t byteBuffer[__CFMaximumConvertedLength];
 408     CFIndex usedLen;
 409
 410     *usedByteLen = 0;
 411
 412     while ((processedCharLen < numChars) && (!maxByteLen || (*usedByteLen < maxByteLen))) {
 413         if (!(usedLen = ((CFStringEncodingCheapMultiByteToBytesProc)((const _CFEncodingConverter*)converter)->definition->toBytes)(flags, characters[processedCharLen], byteBuffer))) break;
 414
 415         if (maxByteLen) {
 416             CFIndex idx;
 417
 418             if (*usedByteLen + usedLen > maxByteLen) break;
 419
 420             for (idx = 0;idx <usedLen;idx++) {
 421                 bytes[*usedByteLen + idx] = byteBuffer[idx];
 422             }
 423         }
 424
 425         *usedByteLen += usedLen;
 426         processedCharLen++;
 427     }
 428
 429     return processedCharLen;
 430 }
 431
 432 static CFIndex __CFToUnicodeCheapMultiByteWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
 433     CFIndex processedByteLen = 0;
 434     UniChar character;
 435     CFIndex usedLen;
 436
 437     *usedCharLen = 0;
 438
 439     while (numBytes && (!maxCharLen || (*usedCharLen < maxCharLen))) {
 440         if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes, numBytes, &character))) break;
 441
 442         if (maxCharLen) *(characters++) = character;
 443         (*usedCharLen)++;
 444         processedByteLen += usedLen;
 445         bytes += usedLen;
 446         numBytes -= usedLen;
 447     }
 448
 449     return processedByteLen;
 450 }
 451
 452 static CFIndex __CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
 453     CFIndex processedByteLen = 0;
 454     UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
 455     UniChar character;
 456     CFIndex usedLen;
 457     CFIndex decomposedLen;
 458     CFIndex theUsedCharLen = 0;
 459     bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
 460
 461     while (numBytes && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 462         if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes, numBytes, &character))) break;
 463
 464         if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
 465             CFIndex idx;
 466
 467             decomposedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
 468             *usedCharLen = theUsedCharLen;
 469
 470             for (idx = 0;idx < decomposedLen;idx++) {
 471                 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
 472                     if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
 473                     theUsedCharLen += 2;
 474                     if (maxCharLen) {
 475                         charBuffer[idx] = charBuffer[idx] - 0x10000;
 476                         *(characters++) = (UniChar)(charBuffer[idx] >> 10) + 0xD800UL;
 477                         *(characters++) = (UniChar)(charBuffer[idx] & 0x3FF) + 0xDC00UL;
 478                     }
 479                 } else {
 480                     if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
 481                     ++theUsedCharLen;
 482                     *(characters++) = charBuffer[idx];
 483                 }
 484             }
 485         } else {
 486             if (maxCharLen) *(characters++) = character;
 487             ++theUsedCharLen;
 488         }
 489
 490         processedByteLen += usedLen;
 491         bytes += usedLen;
 492         numBytes -= usedLen;
 493     }
 494     *usedCharLen = theUsedCharLen;
 495     return processedByteLen;
 496 }
 497
 498 /* static functions
 499 */
 500 CF_INLINE _CFEncodingConverter *__CFEncodingConverterFromDefinition(const CFStringEncodingConverter *definition, CFStringEncoding encoding) {
 501 #define NUM_OF_ENTRIES_CYCLE (10)
 502     static uint32_t _currentIndex = 0;
 503     static uint32_t _allocatedSize = 0;
 504     static _CFEncodingConverter *_allocatedEntries = NULL;
 505     _CFEncodingConverter *converter;
 506
 507
 508     if ((_currentIndex + 1) >= _allocatedSize) {
 509         _currentIndex = 0;
 510         _allocatedSize = 0;
 511         _allocatedEntries = NULL;
 512     }
 513     if (_allocatedEntries == NULL) { // Not allocated yet
 514         _allocatedEntries = (_CFEncodingConverter *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(_CFEncodingConverter) * NUM_OF_ENTRIES_CYCLE, 0);
 515         _allocatedSize = NUM_OF_ENTRIES_CYCLE;
 516         converter = &(_allocatedEntries[_currentIndex]);
 517     } else {
 518         converter = &(_allocatedEntries[++_currentIndex]);
 519     }
 520
 521     memset(converter, 0, sizeof(_CFEncodingConverter));
 522
 523     converter->definition = definition;
 524
 525     switch (definition->encodingClass) {
 526         case kCFStringEncodingConverterStandard:
 527             converter->toBytes = NULL;
 528             converter->toUnicode = NULL;
 529             converter->toCanonicalUnicode = NULL;
 530             break;
 531
 532         case kCFStringEncodingConverterCheapEightBit:
 533             converter->toBytes = __CFToBytesCheapEightBitWrapper;
 534             converter->toUnicode = __CFToUnicodeCheapEightBitWrapper;
 535             converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapEightBitWrapper;
 536             break;
 537
 538         case kCFStringEncodingConverterStandardEightBit:
 539             converter->toBytes = __CFToBytesStandardEightBitWrapper;
 540             converter->toUnicode = __CFToUnicodeStandardEightBitWrapper;
 541             converter->toCanonicalUnicode = __CFToCanonicalUnicodeStandardEightBitWrapper;
 542             break;
 543
 544         case kCFStringEncodingConverterCheapMultiByte:
 545             converter->toBytes = __CFToBytesCheapMultiByteWrapper;
 546             converter->toUnicode = __CFToUnicodeCheapMultiByteWrapper;
 547             converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapMultiByteWrapper;
 548             break;
 549
 550 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
 551         case kCFStringEncodingConverterICU:
 552             converter->toBytes = (_CFToBytesProc)__CFStringEncodingGetICUName(encoding);
 553             break;
 554 #endif
 555
 556         case kCFStringEncodingConverterPlatformSpecific:
 557             break;
 558
 559         default: // Shouln't be here
 560             return NULL;
 561     }
 562
 563     converter->toBytesFallback = (definition->toBytesFallback ? definition->toBytesFallback : __CFDefaultToBytesFallbackProc);
 564     converter->toUnicodeFallback = (definition->toUnicodeFallback ? definition->toUnicodeFallback : __CFDefaultToUnicodeFallbackProc);
 565
 566     return converter;
 567 }
 568
 569 CF_INLINE const CFStringEncodingConverter *__CFStringEncodingConverterGetDefinition(CFStringEncoding encoding) {
 570     switch (encoding) {
 571         case kCFStringEncodingUTF8:
 572             return &__CFConverterUTF8;
 573
 574         case kCFStringEncodingMacRoman:
 575             return &__CFConverterMacRoman;
 576
 577         case kCFStringEncodingWindowsLatin1:
 578             return &__CFConverterWinLatin1;
 579
 580         case kCFStringEncodingASCII:
 581             return &__CFConverterASCII;
 582
 583         case kCFStringEncodingISOLatin1:
 584             return &__CFConverterISOLatin1;
 585
 586
 587         case kCFStringEncodingNextStepLatin:
 588             return &__CFConverterNextStepLatin;
 589
 590
 591         default:
 592             return __CFStringEncodingGetExternalConverter(encoding);
 593     }
 594 }
 595
 596 static const _CFEncodingConverter *__CFGetConverter(uint32_t encoding) {
 597     const _CFEncodingConverter *converter = NULL;
 598     const _CFEncodingConverter **commonConverterSlot = NULL;
 599     static _CFEncodingConverter *commonConverters[3] = {NULL, NULL, NULL}; // UTF8, MacRoman/WinLatin1, and the default encoding*
 600     static CFMutableDictionaryRef mappingTable = NULL;
 601     static CFSpinLock_t lock = CFSpinLockInit;
 602
 603     switch (encoding) {
 604         case kCFStringEncodingUTF8: commonConverterSlot = (const _CFEncodingConverter **)&(commonConverters[0]); break;
 605
 606             /* the swith here should avoid possible bootstrap issues in the default: case below when invoked from CFStringGetSystemEncoding() */
 607 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
 608         case kCFStringEncodingMacRoman: commonConverterSlot = (const _CFEncodingConverter **)&(commonConverters[1]); break;
 609 #elif DEPLOYMENT_TARGET_WINDOWS
 610         case kCFStringEncodingWindowsLatin1: commonConverterSlot = (const _CFEncodingConverter **)(&(commonConverters[1])); break;
 611 #else
 612 #warning This case must match __defaultEncoding value defined in CFString.c
 613         case kCFStringEncodingISOLatin1: commonConverterSlot = (const _CFEncodingConverter **)(&(commonConverters[1])); break;
 614 #endif
 615
 616         default: if (CFStringGetSystemEncoding() == encoding) commonConverterSlot = (const _CFEncodingConverter **)&(commonConverters[2]); break;
 617     }
 618
 619     __CFSpinLock(&lock);
 620     converter = ((NULL == commonConverterSlot) ? ((NULL == mappingTable) ? NULL : (const _CFEncodingConverter *)CFDictionaryGetValue(mappingTable, (const void *)(uintptr_t)encoding)) : *commonConverterSlot);
 621     __CFSpinUnlock(&lock);
 622
 623     if (NULL == converter) {
 624         const CFStringEncodingConverter *definition = __CFStringEncodingConverterGetDefinition(encoding);
 625
 626         if (NULL != definition) {
 627             __CFSpinLock(&lock);
 628             converter = ((NULL == commonConverterSlot) ? ((NULL == mappingTable) ? NULL : (const _CFEncodingConverter *)CFDictionaryGetValue(mappingTable, (const void *)(uintptr_t)encoding)) : *commonConverterSlot);
 629
 630             if (NULL == converter) {
 631                 converter = __CFEncodingConverterFromDefinition(definition, encoding);
 632
 633                 if (NULL == commonConverterSlot) {
 634                     if (NULL == mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
 635
 636                     CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)encoding, converter);
 637                 } else {
 638                     *commonConverterSlot = converter;
 639                 }
 640             }
 641             __CFSpinUnlock(&lock);
 642         }
 643     }
 644
 645     return converter;
 646 }
 647
 648 /* Public API
 649 */
 650 uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
 651     if (encoding == kCFStringEncodingUTF8) {
 652         static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
 653         CFIndex convertedCharLen;
 654         CFIndex usedLen;
 655
 656
 657         if ((flags & kCFStringEncodingUseCanonical) || (flags & kCFStringEncodingUseHFSPlusCanonical)) {
 658             (void)CFUniCharDecompose(characters, numChars, &convertedCharLen, (void *)bytes, maxByteLen, &usedLen, true, kCFUniCharUTF8Format, (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false));
 659         } else {
 660             if (!__CFToUTF8) {
 661                 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
 662                 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
 663             }
 664             convertedCharLen = __CFToUTF8(0, characters, numChars, bytes, maxByteLen, &usedLen);
 665         }
 666         if (usedCharLen) *usedCharLen = convertedCharLen;
 667         if (usedByteLen) *usedByteLen = usedLen;
 668
 669         if (convertedCharLen == numChars) {
 670             return kCFStringEncodingConversionSuccess;
 671         } else if ((maxByteLen > 0) && ((maxByteLen - usedLen) < 10)) { // could be filled outbuf
 672             UTF16Char character = characters[convertedCharLen];
 673
 674             if (((character >= kSurrogateLowStart) && (character <= kSurrogateLowEnd)) || ((character >= kSurrogateHighStart) && (character <= kSurrogateHighEnd) && ((1 == (numChars - convertedCharLen)) || (characters[convertedCharLen + 1] < kSurrogateLowStart) || (characters[convertedCharLen + 1] > kSurrogateLowEnd)))) return kCFStringEncodingInvalidInputStream;
 675
 676             return kCFStringEncodingInsufficientOutputBufferLength;
 677         } else {
 678             return kCFStringEncodingInvalidInputStream;
 679         }
 680     } else {
 681         const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 682         CFIndex usedLen = 0;
 683         CFIndex localUsedByteLen;
 684         CFIndex theUsedByteLen = 0;
 685         uint32_t theResult = kCFStringEncodingConversionSuccess;
 686         CFStringEncodingToBytesPrecomposeProc toBytesPrecompose = NULL;
 687         CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar = NULL;
 688
 689         if (!converter) return kCFStringEncodingConverterUnavailable;
 690
 691         if (flags & kCFStringEncodingSubstituteCombinings) {
 692             if (!(flags & kCFStringEncodingAllowLossyConversion)) isValidCombiningChar = converter->definition->isValidCombiningChar;
 693        } else {
 694             isValidCombiningChar = converter->definition->isValidCombiningChar;
 695             if (!(flags & kCFStringEncodingIgnoreCombinings)) {
 696                 toBytesPrecompose = converter->definition->toBytesPrecompose;
 697                 flags |= kCFStringEncodingComposeCombinings;
 698             }
 699         }
 700
 701 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
 702         if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUToBytes((const char *)converter->toBytes, flags, characters, numChars, usedCharLen, bytes, maxByteLen, usedByteLen);
 703 #endif
 704
 705         /* Platform converter */
 706         if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformUnicodeToBytes(encoding, flags, characters, numChars, usedCharLen, bytes, maxByteLen, usedByteLen);
 707
 708         while ((usedLen < numChars) && (!maxByteLen || (theUsedByteLen < maxByteLen))) {
 709             if ((usedLen += TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) < numChars) {
 710                 CFIndex dummy;
 711
 712                 if (isValidCombiningChar && (usedLen > 0) && isValidCombiningChar(characters[usedLen])) {
 713                     if (toBytesPrecompose) {
 714                         CFIndex localUsedLen = usedLen;
 715
 716                         while (isValidCombiningChar(characters[--usedLen]));
 717                         theUsedByteLen += localUsedByteLen;
 718                         if (converter->definition->maxBytesPerChar > 1) {
 719                             TO_BYTE(converter, flags, characters + usedLen, localUsedLen - usedLen, NULL, 0, &localUsedByteLen);
 720                             theUsedByteLen -= localUsedByteLen;
 721                         } else {
 722                             theUsedByteLen--;
 723                         }
 724                         if ((localUsedLen = toBytesPrecompose(flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) > 0) {
 725                             usedLen += localUsedLen;
 726                             if ((usedLen < numChars) && isValidCombiningChar(characters[usedLen])) { // There is a non-base char not combined remaining
 727                                 theUsedByteLen += localUsedByteLen;
 728                                 theResult = kCFStringEncodingInvalidInputStream;
 729                                 break;
 730                             }
 731                         } else if (flags & kCFStringEncodingAllowLossyConversion) {
 732                             uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
 733
 734                             if (lossyByte) {
 735                                 while (isValidCombiningChar(characters[++usedLen]));
 736                                 localUsedByteLen = 1;
 737                                 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
 738                             } else {
 739                                 ++usedLen;
 740                                 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
 741                             }
 742                         } else {
 743                             theResult = kCFStringEncodingInvalidInputStream;
 744                             break;
 745                         }
 746                     } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
 747                                     theUsedByteLen += localUsedByteLen;
 748                                     theResult = kCFStringEncodingInsufficientOutputBufferLength;
 749                                     break;
 750                     } else if (flags & kCFStringEncodingIgnoreCombinings) {
 751                         while ((++usedLen < numChars) && isValidCombiningChar(characters[usedLen]));
 752                     } else {
 753                         uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
 754
 755                         theUsedByteLen += localUsedByteLen;
 756                         if (lossyByte) {
 757                             ++usedLen;
 758                             localUsedByteLen = 1;
 759                             if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
 760                         } else {
 761                             usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
 762                         }
 763                     }
 764                 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
 765                     theUsedByteLen += localUsedByteLen;
 766
 767                     if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
 768                         CFIndex localUsedLen;
 769
 770                         localUsedByteLen = 0;
 771                         while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
 772                     }
 773                     if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
 774                     break;
 775                 } else if (flags & kCFStringEncodingAllowLossyConversion) {
 776                     uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
 777
 778                     theUsedByteLen += localUsedByteLen;
 779                     if (lossyByte) {
 780                         ++usedLen;
 781                         localUsedByteLen = 1;
 782                         if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
 783                     } else {
 784                         usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
 785                     }
 786                 } else {
 787                     theUsedByteLen += localUsedByteLen;
 788                     theResult = kCFStringEncodingInvalidInputStream;
 789                     break;
 790                 }
 791             }
 792             theUsedByteLen += localUsedByteLen;
 793         }
 794
 795         if (usedLen < numChars && maxByteLen && theResult == kCFStringEncodingConversionSuccess) {
 796             if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
 797                 CFIndex localUsedLen;
 798
 799                 localUsedByteLen = 0;
 800                 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
 801             }
 802             if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
 803         }
 804         if (usedByteLen) *usedByteLen = theUsedByteLen;
 805         if (usedCharLen) *usedCharLen = usedLen;
 806
 807         return theResult;
 808     }
 809 }
 810
 811 uint32_t CFStringEncodingBytesToUnicode(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, CFIndex *usedByteLen, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
 812     const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 813     CFIndex usedLen = 0;
 814     CFIndex theUsedCharLen = 0;
 815     CFIndex localUsedCharLen;
 816     uint32_t theResult = kCFStringEncodingConversionSuccess;
 817
 818     if (!converter) return kCFStringEncodingConverterUnavailable;
 819
 820 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
 821     if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUToUnicode((const char *)converter->toBytes, flags, bytes, numBytes, usedByteLen, characters, maxCharLen, usedCharLen);
 822 #endif
 823
 824     /* Platform converter */
 825     if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformBytesToUnicode(encoding, flags, bytes, numBytes, usedByteLen, characters, maxCharLen, usedCharLen);
 826
 827     while ((usedLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 828         if ((usedLen += TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen)) < numBytes) {
 829             CFIndex tempUsedCharLen;
 830
 831             if (maxCharLen && ((maxCharLen == theUsedCharLen + localUsedCharLen) || (((flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical)) || (maxCharLen == theUsedCharLen + localUsedCharLen + 1)) && TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, NULL, 0, &tempUsedCharLen)))) { // buffer was filled up
 832                 theUsedCharLen += localUsedCharLen;
 833                 theResult = kCFStringEncodingInsufficientOutputBufferLength;
 834                 break;
 835             } else if (flags & kCFStringEncodingAllowLossyConversion) {
 836                 theUsedCharLen += localUsedCharLen;
 837                 usedLen += TO_UNICODE_FALLBACK(converter, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen);
 838             } else {
 839                 theUsedCharLen += localUsedCharLen;
 840                 theResult = kCFStringEncodingInvalidInputStream;
 841                 break;
 842             }
 843         }
 844         theUsedCharLen += localUsedCharLen;
 845     }
 846
 847     if (usedLen < numBytes && maxCharLen && theResult == kCFStringEncodingConversionSuccess) {
 848         theResult = kCFStringEncodingInsufficientOutputBufferLength;
 849     }
 850     if (usedCharLen) *usedCharLen = theUsedCharLen;
 851     if (usedByteLen) *usedByteLen = usedLen;
 852
 853     return theResult;
 854 }
 855
 856 CF_PRIVATE bool CFStringEncodingIsValidEncoding(uint32_t encoding) {
 857     return (CFStringEncodingGetConverter(encoding) ? true : false);
 858 }
 859
 860 CF_PRIVATE CFIndex CFStringEncodingCharLengthForBytes(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes) {
 861     const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 862
 863     if (converter) {
 864 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
 865         if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUCharLength((const char *)converter->toBytes, flags, bytes, numBytes);
 866 #endif
 867
 868         if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformCharLengthForBytes(encoding, flags, bytes, numBytes);
 869
 870         if (1 == converter->definition->maxBytesPerChar) return numBytes;
 871
 872         if (NULL == converter->definition->toUnicodeLen) {
 873             CFIndex usedByteLen = 0;
 874             CFIndex totalLength = 0;
 875             CFIndex usedCharLen;
 876
 877             while (numBytes > 0) {
 878                 usedByteLen = TO_UNICODE(converter, flags, bytes, numBytes, NULL, 0, &usedCharLen);
 879
 880                 bytes += usedByteLen;
 881                 numBytes -= usedByteLen;
 882                 totalLength += usedCharLen;
 883
 884                 if (numBytes > 0) {
 885                     if (0 == (flags & kCFStringEncodingAllowLossyConversion)) return 0;
 886
 887                     usedByteLen = TO_UNICODE_FALLBACK(converter, bytes, numBytes, NULL, 0, &usedCharLen);
 888
 889                     bytes += usedByteLen;
 890                     numBytes -= usedByteLen;
 891                     totalLength += usedCharLen;
 892                 }
 893             }
 894
 895             return totalLength;
 896         } else {
 897             return converter->definition->toUnicodeLen(flags, bytes, numBytes);
 898         }
 899     }
 900
 901     return 0;
 902 }
 903
 904 CF_PRIVATE CFIndex CFStringEncodingByteLengthForCharacters(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars) {
 905     const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 906
 907     if (converter) {
 908 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
 909         if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUByteLength((const char *)converter->toBytes, flags, characters, numChars);
 910 #endif
 911
 912         if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformByteLengthForCharacters(encoding, flags, characters, numChars);
 913
 914         if (1 == converter->definition->maxBytesPerChar) return numChars;
 915
 916         if (NULL == converter->definition->toBytesLen) {
 917             CFIndex usedByteLen;
 918
 919             return ((kCFStringEncodingConversionSuccess == CFStringEncodingUnicodeToBytes(encoding, flags, characters, numChars, NULL, NULL, 0, &usedByteLen)) ? usedByteLen : 0);
 920         } else {
 921             return converter->definition->toBytesLen(flags, characters, numChars);
 922         }
 923     }
 924
 925     return 0;
 926 }
 927
 928 void CFStringEncodingRegisterFallbackProcedures(uint32_t encoding, CFStringEncodingToBytesFallbackProc toBytes, CFStringEncodingToUnicodeFallbackProc toUnicode) {
 929     _CFEncodingConverter *converter = (_CFEncodingConverter *)__CFGetConverter(encoding);
 930
 931     if (NULL != converter) {
 932        const CFStringEncodingConverter *body = CFStringEncodingGetConverter(encoding);
 933
 934         converter->toBytesFallback = ((NULL == toBytes) ? ((NULL == body) ? __CFDefaultToBytesFallbackProc : body->toBytesFallback) : toBytes);
 935         converter->toUnicodeFallback = ((NULL == toUnicode) ? ((NULL == body) ? __CFDefaultToUnicodeFallbackProc : body->toUnicodeFallback) : toUnicode);
 936     }
 937 }
 938
 939 CF_PRIVATE const CFStringEncodingConverter *CFStringEncodingGetConverter(uint32_t encoding) {
 940     const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 941
 942     return ((NULL == converter) ? NULL : converter->definition);
 943 }
 944
 945 static const CFStringEncoding __CFBuiltinEncodings[] = {
 946     kCFStringEncodingMacRoman,
 947     kCFStringEncodingWindowsLatin1,
 948     kCFStringEncodingISOLatin1,
 949     kCFStringEncodingNextStepLatin,
 950     kCFStringEncodingASCII,
 951     kCFStringEncodingUTF8,
 952     /* These seven are available only in CFString-level */
 953     kCFStringEncodingNonLossyASCII,
 954
 955     kCFStringEncodingUTF16,
 956     kCFStringEncodingUTF16BE,
 957     kCFStringEncodingUTF16LE,
 958
 959     kCFStringEncodingUTF32,
 960     kCFStringEncodingUTF32BE,
 961     kCFStringEncodingUTF32LE,
 962
 963     kCFStringEncodingInvalidId,
 964 };
 965
 966 static CFComparisonResult __CFStringEncodingComparator(const void *v1, const void *v2, void *context) {
 967     CFComparisonResult val1 = (*(const CFStringEncoding *)v1) & 0xFFFF;
 968     CFComparisonResult val2 = (*(const CFStringEncoding *)v2) & 0xFFFF;
 969
 970     return ((val1 == val2) ? ((CFComparisonResult)(*(const CFStringEncoding *)v1) - (CFComparisonResult)(*(const CFStringEncoding *)v2)) : val1 - val2);
 971 }
 972
 973 static void __CFStringEncodingFliterDupes(CFStringEncoding *encodings, CFIndex numSlots) {
 974     CFStringEncoding last = kCFStringEncodingInvalidId;
 975     const CFStringEncoding *limitEncodings = encodings + numSlots;
 976
 977     while (encodings < limitEncodings) {
 978         if (last == *encodings) {
 979             if ((encodings + 1) < limitEncodings) memmove(encodings, encodings + 1, sizeof(CFStringEncoding) * (limitEncodings - encodings - 1));
 980             --limitEncodings;
 981         } else {
 982             last = *(encodings++);
 983         }
 984     }
 985 }
 986
 987 CF_PRIVATE const CFStringEncoding *CFStringEncodingListOfAvailableEncodings(void) {
 988     static const CFStringEncoding *encodings = NULL;
 989
 990     if (NULL == encodings) {
 991         CFStringEncoding *list = (CFStringEncoding *)__CFBuiltinEncodings;
 992         CFIndex numICUConverters = 0, numPlatformConverters = 0;
 993 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
 994         CFStringEncoding *icuConverters = __CFStringEncodingCreateICUEncodings(NULL, &numICUConverters);
 995 #else
 996         CFStringEncoding *icuConverters = NULL;
 997 #endif
 998         CFStringEncoding *platformConverters = __CFStringEncodingCreateListOfAvailablePlatformConverters(NULL, &numPlatformConverters);
 999
1000         if ((NULL != icuConverters) || (NULL != platformConverters)) {
1001             CFIndex numSlots = (sizeof(__CFBuiltinEncodings) / sizeof(*__CFBuiltinEncodings)) + numICUConverters + numPlatformConverters;
1002
1003             list = (CFStringEncoding *)CFAllocatorAllocate(NULL, sizeof(CFStringEncoding) * numSlots, 0);
1004
1005             memcpy(list, __CFBuiltinEncodings, sizeof(__CFBuiltinEncodings));
1006
1007             if (NULL != icuConverters) {
1008                 memcpy(list + (sizeof(__CFBuiltinEncodings) / sizeof(*__CFBuiltinEncodings)), icuConverters, sizeof(CFStringEncoding) * numICUConverters);
1009                 CFAllocatorDeallocate(NULL, icuConverters);
1010             }
1011
1012             if (NULL != platformConverters) {
1013                 memcpy(list + (sizeof(__CFBuiltinEncodings) / sizeof(*__CFBuiltinEncodings)) + numICUConverters, platformConverters, sizeof(CFStringEncoding) * numPlatformConverters);
1014                 CFAllocatorDeallocate(NULL, platformConverters);
1015             }
1016
1017             CFQSortArray(list, numSlots, sizeof(CFStringEncoding), (CFComparatorFunction)__CFStringEncodingComparator, NULL);
1018             __CFStringEncodingFliterDupes(list, numSlots);
1019         }
1020         if (!OSAtomicCompareAndSwapPtrBarrier(NULL, list, (void * volatile *)&encodings) && (list != __CFBuiltinEncodings)) CFAllocatorDeallocate(NULL, list);
1021     }
1022
1023     return encodings;
1024 }
1025
1026 #undef TO_BYTE
1027 #undef TO_UNICODE
1028 #undef ASCIINewLine
1029 #undef kSurrogateHighStart
1030 #undef kSurrogateHighEnd
1031 #undef kSurrogateLowStart
1032 #undef kSurrogateLowEnd
1033 #undef TO_BYTE_FALLBACK
1034 #undef TO_UNICODE_FALLBACK
1035 #undef EXTRA_BASE
1036 #undef NUM_OF_ENTRIES_CYCLE
1037