StringEncodings.subproj/CFStringEncodingConverter.c

   1 /*
   2  * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /*      CFStringEncodingConverter.c
  24         Copyright 1998-2002, Apple, Inc. All rights reserved.
  25         Responsibility: Aki Inoue
  26 */
  27
  28 #include "CFInternal.h"
  29 #include <CoreFoundation/CFArray.h>
  30 #include <CoreFoundation/CFDictionary.h>
  31 #include "CFUniChar.h"
  32 #include "CFUtilitiesPriv.h"
  33 #include "CFUnicodeDecomposition.h"
  34 #include "CFStringEncodingConverterExt.h"
  35 #include "CFStringEncodingConverterPriv.h"
  36 #include <stdlib.h>
  37 #if !defined(__MACOS8__)
  38 #ifdef __WIN32__
  39 #include <windows.h>
  40 #else // Mach, HP-UX, Solaris
  41 #include <pthread.h>
  42 #endif
  43 #endif __MACOS8__
  44
  45
  46 /* Macros
  47 */
  48 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->_toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->toBytes)(flags,chars,numChars,bytes,max,used))
  49 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->_toUnicode ?  (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->toUnicode)(flags,bytes,numBytes,chars,max,used))
  50
  51 #define LineSeparator 0x2028
  52 #define ParagraphSeparator 0x2029
  53 #define ASCIINewLine 0x0a
  54 #define kSurrogateHighStart 0xD800
  55 #define kSurrogateHighEnd 0xDBFF
  56 #define kSurrogateLowStart 0xDC00
  57 #define kSurrogateLowEnd 0xDFFF
  58
  59 /* Mapping 128..255 to lossy ASCII
  60 */
  61 static const struct {
  62     unsigned char chars[4];
  63 } _toLossyASCIITable[] = {
  64     {{' ', 0, 0, 0}}, // NO-BREAK SPACE
  65     {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
  66     {{'c', 0, 0, 0}}, // CENT SIGN
  67     {{'L', 0, 0, 0}}, // POUND SIGN
  68     {{'$', 0, 0, 0}}, // CURRENCY SIGN
  69     {{'Y', 0, 0, 0}}, // YEN SIGN
  70     {{'|', 0, 0, 0}}, // BROKEN BAR
  71     {{0, 0, 0, 0}}, // SECTION SIGN
  72     {{0, 0, 0, 0}}, // DIAERESIS
  73     {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
  74     {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
  75     {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
  76     {{0, 0, 0, 0}}, // NOT SIGN
  77     {{'-', 0, 0, 0}}, // SOFT HYPHEN
  78     {{'(', 'R', ')', 0}}, // REGISTERED SIGN
  79     {{0, 0, 0, 0}}, // MACRON
  80     {{0, 0, 0, 0}}, // DEGREE SIGN
  81     {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
  82     {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
  83     {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
  84     {{0, 0, 0, 0}}, // ACUTE ACCENT
  85     {{0, 0, 0, 0}}, // MICRO SIGN
  86     {{0, 0, 0, 0}}, // PILCROW SIGN
  87     {{0, 0, 0, 0}}, // MIDDLE DOT
  88     {{0, 0, 0, 0}}, // CEDILLA
  89     {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
  90     {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
  91     {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
  92     {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
  93     {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
  94     {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
  95     {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
  96     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
  97     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
  98     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
  99     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
 100     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
 101     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
 102     {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
 103     {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
 104     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
 105     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
 106     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 107     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
 108     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
 109     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
 110     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 111     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
 112     {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
 113     {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
 114     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
 115     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
 116     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 117     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
 118     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
 119     {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
 120     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
 121     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
 122     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
 123     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 124     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
 125     {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
 126     {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
 127     {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
 128     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
 129     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
 130     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
 131     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
 132     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
 133     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
 134     {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
 135     {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
 136     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
 137     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
 138     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
 139     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
 140     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
 141     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
 142     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
 143     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
 144     {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
 145     {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
 146     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
 147     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
 148     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
 149     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
 150     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
 151     {{'/', 0, 0, 0}}, // DIVISION SIGN
 152     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
 153     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
 154     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
 155     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
 156     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
 157     {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
 158     {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
 159     {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
 160 };
 161
 162 CF_INLINE UInt32 __CFToASCIILatin1Fallback(UniChar character, UInt8 *bytes, UInt32 maxByteLen) {
 163     const char *losChars = (const unsigned char*)_toLossyASCIITable + (character - 0xA0) * sizeof(unsigned char[4]);
 164     unsigned int numBytes = 0;
 165     int idx, max = (maxByteLen && (maxByteLen < 4) ? maxByteLen : 4);
 166
 167     for (idx = 0;idx < max;idx++) {
 168         if (losChars[idx]) {
 169             if (maxByteLen) bytes[idx] = losChars[idx];
 170             ++numBytes;
 171         } else {
 172             break;
 173         }
 174     }
 175
 176     return numBytes;
 177 }
 178
 179 static UInt32 __CFDefaultToBytesFallbackProc(const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
 180     if (*characters < 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
 181         if (maxByteLen) *bytes = (UInt8)(*characters - 0x80);
 182         *usedByteLen = 1;
 183         return 1;
 184     } else if (*characters < 0x100) {
 185         *usedByteLen = __CFToASCIILatin1Fallback(*characters, bytes, maxByteLen);
 186         return 1;
 187     } else if (*characters >= kSurrogateHighStart && *characters <= kSurrogateLowEnd) {
 188         if (maxByteLen) *bytes = '?';
 189         *usedByteLen = 1;
 190         return (numChars > 1 && *characters <= kSurrogateLowStart && *(characters + 1) >= kSurrogateLowStart && *(characters + 1) <= kSurrogateLowEnd ? 2 : 1);
 191     } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceCharacterSet)) {
 192         if (maxByteLen) *bytes = ' ';
 193         *usedByteLen = 1;
 194         return 1;
 195     } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceAndNewlineCharacterSet)) {
 196         if (maxByteLen) *bytes = ASCIINewLine;
 197         *usedByteLen = 1;
 198         return 1;
 199     } else if (!CFUniCharIsMemberOf(*characters, kCFUniCharLetterCharacterSet)) {
 200         *usedByteLen = 0;
 201         return 1;
 202     } else if (CFUniCharIsMemberOf(*characters, kCFUniCharDecomposableCharacterSet)) {
 203         UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
 204
 205         (void)CFUniCharDecomposeCharacter(*characters, decomposed, MAX_DECOMPOSED_LENGTH);
 206         if (*decomposed < 0x80) {
 207             if (maxByteLen) *bytes = (UInt8)(*decomposed);
 208             *usedByteLen = 1;
 209             return 1;
 210         } else {
 211             UTF16Char theChar = *decomposed;
 212
 213             return __CFDefaultToBytesFallbackProc(&theChar, 1, bytes, maxByteLen, usedByteLen);
 214         }
 215     } else {
 216         if (maxByteLen) *bytes = '?';
 217         *usedByteLen = 1;
 218         return 1;
 219     }
 220 }
 221
 222 static UInt32 __CFDefaultToUnicodeFallbackProc(const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 223     if (maxCharLen) *characters = (UniChar)'?';
 224     *usedCharLen = 1;
 225     return 1;
 226 }
 227
 228 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
 229 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
 230
 231 #define EXTRA_BASE (0x0F00)
 232
 233 /* Wrapper funcs for non-standard converters
 234 */
 235 static UInt32 __CFToBytesCheapEightBitWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
 236     UInt32 processedCharLen = 0;
 237     UInt32 length = (maxByteLen && (maxByteLen < numChars) ? maxByteLen : numChars);
 238     uint8_t byte;
 239
 240     while (processedCharLen < length) {
 241         if (!((CFStringEncodingCheapEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], &byte)) break;
 242
 243         if (maxByteLen) bytes[processedCharLen] = byte;
 244         processedCharLen++;
 245     }
 246
 247     *usedByteLen = processedCharLen;
 248     return processedCharLen;
 249 }
 250
 251 static UInt32 __CFToUnicodeCheapEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 252     UInt32 processedByteLen = 0;
 253     UInt32 length = (maxCharLen && (maxCharLen < numBytes) ? maxCharLen : numBytes);
 254     UniChar character;
 255
 256     while (processedByteLen < length) {
 257         if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
 258
 259         if (maxCharLen) characters[processedByteLen] = character;
 260         processedByteLen++;
 261     }
 262
 263     *usedCharLen = processedByteLen;
 264     return processedByteLen;
 265 }
 266
 267 static UInt32 __CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 268     UInt32 processedByteLen = 0;
 269     UInt32 theUsedCharLen = 0;
 270     UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
 271     UInt32 usedLen;
 272     UniChar character;
 273     bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
 274
 275     while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 276         if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
 277
 278         if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
 279             uint32_t idx;
 280
 281             usedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
 282             *usedCharLen = theUsedCharLen;
 283
 284             for (idx = 0;idx < usedLen;idx++) {
 285                 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
 286                     if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
 287                     theUsedCharLen += 2;
 288                     if (maxCharLen) {
 289                         charBuffer[idx] = charBuffer[idx] - 0x10000;
 290                         *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
 291                         *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
 292                     }
 293                 } else {
 294                     if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
 295                     ++theUsedCharLen;
 296                     *(characters++) = charBuffer[idx];
 297                 }
 298             }
 299         } else {
 300             if (maxCharLen) *(characters++) = character;
 301             ++theUsedCharLen;
 302         }
 303         processedByteLen++;
 304     }
 305
 306     *usedCharLen = theUsedCharLen;
 307     return processedByteLen;
 308 }
 309
 310 static UInt32 __CFToBytesStandardEightBitWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
 311     UInt32 processedCharLen = 0;
 312     uint8_t byte;
 313     UInt32 usedLen;
 314
 315     *usedByteLen = 0;
 316
 317     while (numChars && (!maxByteLen || (*usedByteLen < maxByteLen))) {
 318         if (!(usedLen = ((CFStringEncodingStandardEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters, numChars, &byte))) break;
 319
 320         if (maxByteLen) bytes[*usedByteLen] = byte;
 321         (*usedByteLen)++;
 322         characters += usedLen;
 323         numChars -= usedLen;
 324         processedCharLen += usedLen;
 325     }
 326
 327     return processedCharLen;
 328 }
 329
 330 static UInt32 __CFToUnicodeStandardEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 331     UInt32 processedByteLen = 0;
 332 #if defined(__MACOS8__) || defined(__WIN32__)
 333     UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
 334 #else
 335     UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
 336 #endif
 337     UInt32 usedLen;
 338
 339     *usedCharLen = 0;
 340
 341     while ((processedByteLen < numBytes) && (!maxCharLen || (*usedCharLen < maxCharLen))) {
 342         if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
 343
 344         if (maxCharLen) {
 345             uint16_t idx;
 346
 347             if (*usedCharLen + usedLen > maxCharLen) break;
 348
 349             for (idx = 0;idx < usedLen;idx++) {
 350                 characters[*usedCharLen + idx] = charBuffer[idx];
 351             }
 352         }
 353         *usedCharLen += usedLen;
 354         processedByteLen++;
 355     }
 356
 357     return processedByteLen;
 358 }
 359
 360 static UInt32 __CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 361     UInt32 processedByteLen = 0;
 362 #if defined(__MACOS8__) || defined(__WIN32__)
 363     UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
 364 #else
 365     UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
 366 #endif
 367     UTF32Char decompBuffer[MAX_DECOMPOSED_LENGTH];
 368     UInt32 usedLen;
 369     UInt32 decompedLen;
 370     UInt32 idx, decompIndex;
 371     bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
 372     UInt32 theUsedCharLen = 0;
 373
 374     while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 375         if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
 376
 377         for (idx = 0;idx < usedLen;idx++) {
 378             if (CFUniCharIsDecomposableCharacter(charBuffer[idx], isHFSPlus)) {
 379                 decompedLen = CFUniCharDecomposeCharacter(charBuffer[idx], decompBuffer, MAX_DECOMPOSED_LENGTH);
 380                 *usedCharLen = theUsedCharLen;
 381
 382                 for (decompIndex = 0;decompIndex < decompedLen;decompIndex++) {
 383                     if (decompBuffer[decompIndex] > 0xFFFF) { // Non-BMP
 384                         if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
 385                         theUsedCharLen += 2;
 386                         if (maxCharLen) {
 387                             charBuffer[idx] = charBuffer[idx] - 0x10000;
 388                             *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
 389                             *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
 390                         }
 391                     } else {
 392                         if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
 393                         ++theUsedCharLen;
 394                         *(characters++) = charBuffer[idx];
 395                     }
 396                 }
 397             } else {
 398                 if (maxCharLen) *(characters++) = charBuffer[idx];
 399                 ++theUsedCharLen;
 400             }
 401         }
 402         processedByteLen++;
 403     }
 404
 405     *usedCharLen = theUsedCharLen;
 406     return processedByteLen;
 407 }
 408
 409 static UInt32 __CFToBytesCheapMultiByteWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
 410     UInt32 processedCharLen = 0;
 411 #if defined(__MACOS8__) || defined(__WIN32__)
 412     uint8_t byteBuffer[20]; // Dynamic stack allocation is GNU specific
 413 #else
 414     uint8_t byteBuffer[((const _CFEncodingConverter*)converter)->maxLen];
 415 #endif
 416     UInt32 usedLen;
 417
 418     *usedByteLen = 0;
 419
 420     while ((processedCharLen < numChars) && (!maxByteLen || (*usedByteLen < maxByteLen))) {
 421         if (!(usedLen = ((CFStringEncodingCheapMultiByteToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], byteBuffer))) break;
 422
 423         if (maxByteLen) {
 424             uint16_t idx;
 425
 426             if (*usedByteLen + usedLen > maxByteLen) break;
 427
 428             for (idx = 0;idx <usedLen;idx++) {
 429                 bytes[*usedByteLen + idx] = byteBuffer[idx];
 430             }
 431         }
 432
 433         *usedByteLen += usedLen;
 434         processedCharLen++;
 435     }
 436
 437     return processedCharLen;
 438 }
 439
 440 static UInt32 __CFToUnicodeCheapMultiByteWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 441     UInt32 processedByteLen = 0;
 442     UniChar character;
 443     UInt32 usedLen;
 444
 445     *usedCharLen = 0;
 446
 447     while (numBytes && (!maxCharLen || (*usedCharLen < maxCharLen))) {
 448         if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
 449
 450         if (maxCharLen) *(characters++) = character;
 451         (*usedCharLen)++;
 452         processedByteLen += usedLen;
 453         bytes += usedLen;
 454         numBytes -= usedLen;
 455     }
 456
 457     return processedByteLen;
 458 }
 459
 460 static UInt32 __CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 461     UInt32 processedByteLen = 0;
 462     UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
 463     UniChar character;
 464     UInt32 usedLen;
 465     UInt32 decomposedLen;
 466     UInt32 theUsedCharLen = 0;
 467     bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
 468
 469     while (numBytes && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 470         if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
 471
 472         if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
 473             uint32_t idx;
 474
 475             decomposedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
 476             *usedCharLen = theUsedCharLen;
 477
 478             for (idx = 0;idx < decomposedLen;idx++) {
 479                 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
 480                     if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
 481                     theUsedCharLen += 2;
 482                     if (maxCharLen) {
 483                         charBuffer[idx] = charBuffer[idx] - 0x10000;
 484                         *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
 485                         *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
 486                     }
 487                 } else {
 488                     if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
 489                     ++theUsedCharLen;
 490                     *(characters++) = charBuffer[idx];
 491                 }
 492             }
 493         } else {
 494             if (maxCharLen) *(characters++) = character;
 495             ++theUsedCharLen;
 496         }
 497
 498         processedByteLen += usedLen;
 499         bytes += usedLen;
 500         numBytes -= usedLen;
 501     }
 502     *usedCharLen = theUsedCharLen;
 503     return processedByteLen;
 504 }
 505
 506 /* static functions
 507 */
 508 static _CFConverterEntry __CFConverterEntryASCII = {
 509     kCFStringEncodingASCII, NULL,
 510     "Western (ASCII)", {"us-ascii", "ascii", "iso-646-us", NULL}, NULL, NULL, NULL, NULL,
 511     kCFStringEncodingMacRoman // We use string encoding's script range here
 512 };
 513
 514 static _CFConverterEntry __CFConverterEntryISOLatin1 = {
 515     kCFStringEncodingISOLatin1, NULL,
 516     "Western (ISO Latin 1)", {"iso-8859-1", "latin1","iso-latin-1", NULL}, NULL, NULL, NULL, NULL,
 517     kCFStringEncodingMacRoman // We use string encoding's script range here
 518 };
 519
 520 static _CFConverterEntry __CFConverterEntryMacRoman = {
 521     kCFStringEncodingMacRoman, NULL,
 522     "Western (Mac OS Roman)", {"macintosh", "mac", "x-mac-roman", NULL}, NULL, NULL, NULL, NULL,
 523     kCFStringEncodingMacRoman // We use string encoding's script range here
 524 };
 525
 526 static _CFConverterEntry __CFConverterEntryWinLatin1 = {
 527     kCFStringEncodingWindowsLatin1, NULL,
 528     "Western (Windows Latin 1)", {"windows-1252", "cp1252", "windows latin1", NULL}, NULL, NULL, NULL, NULL,
 529     kCFStringEncodingMacRoman // We use string encoding's script range here
 530 };
 531
 532 static _CFConverterEntry __CFConverterEntryNextStepLatin = {
 533     kCFStringEncodingNextStepLatin, NULL,
 534     "Western (NextStep)", {"x-nextstep", NULL, NULL, NULL}, NULL, NULL, NULL, NULL,
 535     kCFStringEncodingMacRoman // We use string encoding's script range here
 536 };
 537
 538 static _CFConverterEntry __CFConverterEntryUTF8 = {
 539     kCFStringEncodingUTF8, NULL,
 540     "UTF-8", {"utf-8", "unicode-1-1-utf8", NULL, NULL}, NULL, NULL, NULL, NULL,
 541     kCFStringEncodingUnicode // We use string encoding's script range here
 542 };
 543
 544 CF_INLINE _CFConverterEntry *__CFStringEncodingConverterGetEntry(UInt32 encoding) {
 545     switch (encoding) {
 546         case kCFStringEncodingInvalidId:
 547         case kCFStringEncodingASCII:
 548             return &__CFConverterEntryASCII;
 549
 550         case kCFStringEncodingISOLatin1:
 551             return &__CFConverterEntryISOLatin1;
 552
 553         case kCFStringEncodingMacRoman:
 554             return &__CFConverterEntryMacRoman;
 555
 556         case kCFStringEncodingWindowsLatin1:
 557             return &__CFConverterEntryWinLatin1;
 558
 559         case kCFStringEncodingNextStepLatin:
 560             return &__CFConverterEntryNextStepLatin;
 561
 562         case kCFStringEncodingUTF8:
 563             return &__CFConverterEntryUTF8;
 564
 565         default: {
 566             return NULL;
 567         }
 568     }
 569 }
 570
 571 CF_INLINE _CFEncodingConverter *__CFEncodingConverterFromDefinition(const CFStringEncodingConverter *definition) {
 572 #define NUM_OF_ENTRIES_CYCLE (10)
 573     static CFSpinLock_t _indexLock = 0;
 574     static UInt32 _currentIndex = 0;
 575     static UInt32 _allocatedSize = 0;
 576     static _CFEncodingConverter *_allocatedEntries = NULL;
 577     _CFEncodingConverter *converter;
 578
 579
 580     __CFSpinLock(&_indexLock);
 581     if ((_currentIndex + 1) >= _allocatedSize) {
 582         _currentIndex = 0;
 583         _allocatedSize = 0;
 584         _allocatedEntries = NULL;
 585     }
 586     if (_allocatedEntries == NULL) { // Not allocated yet
 587         _allocatedEntries = (_CFEncodingConverter *)CFAllocatorAllocate(NULL, sizeof(_CFEncodingConverter) * NUM_OF_ENTRIES_CYCLE, 0);
 588         _allocatedSize = NUM_OF_ENTRIES_CYCLE;
 589         converter = &(_allocatedEntries[_currentIndex]);
 590     } else {
 591         converter = &(_allocatedEntries[++_currentIndex]);
 592     }
 593     __CFSpinUnlock(&_indexLock);
 594
 595     switch (definition->encodingClass) {
 596         case kCFStringEncodingConverterStandard:
 597             converter->toBytes = definition->toBytes;
 598             converter->toUnicode = definition->toUnicode;
 599             converter->toCanonicalUnicode = definition->toUnicode;
 600             converter->_toBytes = NULL;
 601             converter->_toUnicode = NULL;
 602             converter->maxLen = 2;
 603             break;
 604
 605         case kCFStringEncodingConverterCheapEightBit:
 606             converter->toBytes = __CFToBytesCheapEightBitWrapper;
 607             converter->toUnicode = __CFToUnicodeCheapEightBitWrapper;
 608             converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapEightBitWrapper;
 609             converter->_toBytes = definition->toBytes;
 610             converter->_toUnicode = definition->toUnicode;
 611             converter->maxLen = 1;
 612             break;
 613
 614         case kCFStringEncodingConverterStandardEightBit:
 615             converter->toBytes = __CFToBytesStandardEightBitWrapper;
 616             converter->toUnicode = __CFToUnicodeStandardEightBitWrapper;
 617             converter->toCanonicalUnicode = __CFToCanonicalUnicodeStandardEightBitWrapper;
 618             converter->_toBytes = definition->toBytes;
 619             converter->_toUnicode = definition->toUnicode;
 620             converter->maxLen = definition->maxDecomposedCharLen;
 621             break;
 622
 623         case kCFStringEncodingConverterCheapMultiByte:
 624             converter->toBytes = __CFToBytesCheapMultiByteWrapper;
 625             converter->toUnicode = __CFToUnicodeCheapMultiByteWrapper;
 626             converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapMultiByteWrapper;
 627             converter->_toBytes = definition->toBytes;
 628             converter->_toUnicode = definition->toUnicode;
 629             converter->maxLen = definition->maxBytesPerChar;
 630             break;
 631
 632         case kCFStringEncodingConverterPlatformSpecific:
 633             converter->toBytes = NULL;
 634             converter->toUnicode = NULL;
 635             converter->toCanonicalUnicode = NULL;
 636             converter->_toBytes = NULL;
 637             converter->_toUnicode = NULL;
 638             converter->maxLen = 0;
 639             converter->toBytesLen = NULL;
 640             converter->toUnicodeLen = NULL;
 641             converter->toBytesFallback = NULL;
 642             converter->toUnicodeFallback = NULL;
 643             converter->toBytesPrecompose = NULL;
 644             converter->isValidCombiningChar = NULL;
 645             return converter;
 646
 647         default: // Shouln't be here
 648             return NULL;
 649     }
 650
 651     converter->toBytesLen = (definition->toBytesLen ? definition->toBytesLen : (CFStringEncodingToBytesLenProc)(UInt32)definition->maxBytesPerChar);
 652     converter->toUnicodeLen = (definition->toUnicodeLen ? definition->toUnicodeLen : (CFStringEncodingToUnicodeLenProc)(UInt32)definition->maxDecomposedCharLen);
 653     converter->toBytesFallback = (definition->toBytesFallback ? definition->toBytesFallback : __CFDefaultToBytesFallbackProc);
 654     converter->toUnicodeFallback = (definition->toUnicodeFallback ? definition->toUnicodeFallback : __CFDefaultToUnicodeFallbackProc);
 655     converter->toBytesPrecompose = (definition->toBytesPrecompose ? definition->toBytesPrecompose : NULL);
 656     converter->isValidCombiningChar = (definition->isValidCombiningChar ? definition->isValidCombiningChar : NULL);
 657
 658     return converter;
 659 }
 660
 661 CF_INLINE const CFStringEncodingConverter *__CFStringEncodingConverterGetDefinition(_CFConverterEntry *entry) {
 662     if (!entry) return NULL;
 663
 664     switch (entry->encoding) {
 665         case kCFStringEncodingASCII:
 666             return &__CFConverterASCII;
 667
 668         case kCFStringEncodingISOLatin1:
 669             return &__CFConverterISOLatin1;
 670
 671         case kCFStringEncodingMacRoman:
 672             return &__CFConverterMacRoman;
 673
 674         case kCFStringEncodingWindowsLatin1:
 675             return &__CFConverterWinLatin1;
 676
 677         case kCFStringEncodingNextStepLatin:
 678             return &__CFConverterNextStepLatin;
 679
 680         case kCFStringEncodingUTF8:
 681             return &__CFConverterUTF8;
 682
 683         default:
 684             return NULL;
 685     }
 686 }
 687
 688 static const _CFEncodingConverter *__CFGetConverter(UInt32 encoding) {
 689     _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
 690
 691     if (!entry) return NULL;
 692
 693     if (!entry->converter) {
 694         const CFStringEncodingConverter *definition = __CFStringEncodingConverterGetDefinition(entry);
 695
 696         if (definition) {
 697             entry->converter = __CFEncodingConverterFromDefinition(definition);
 698             entry->toBytesFallback = definition->toBytesFallback;
 699             entry->toUnicodeFallback = definition->toUnicodeFallback;
 700         }
 701     }
 702
 703     return (_CFEncodingConverter *)entry->converter;
 704 }
 705
 706 /* Public API
 707 */
 708 UInt32 CFStringEncodingUnicodeToBytes(UInt32 encoding, UInt32 flags, const UniChar *characters, UInt32 numChars, UInt32 *usedCharLen, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
 709     if (encoding == kCFStringEncodingUTF8) {
 710         static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
 711         uint32_t convertedCharLen;
 712         uint32_t usedLen;
 713
 714
 715         if ((flags & kCFStringEncodingUseCanonical) || (flags & kCFStringEncodingUseHFSPlusCanonical)) {
 716             (void)CFUniCharDecompose(characters, numChars, &convertedCharLen, (void *)bytes, maxByteLen, &usedLen, true, kCFUniCharUTF8Format, (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false));
 717         } else {
 718             if (!__CFToUTF8) {
 719                 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
 720                 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
 721             }
 722             convertedCharLen = __CFToUTF8(0, characters, numChars, bytes, maxByteLen, (UInt32 *)&usedLen);
 723         }
 724         if (usedCharLen) *usedCharLen = convertedCharLen;
 725         if (usedByteLen) *usedByteLen = usedLen;
 726
 727         if (convertedCharLen == numChars) {
 728             return kCFStringEncodingConversionSuccess;
 729         } else if (maxByteLen && (maxByteLen == usedLen)) {
 730             return kCFStringEncodingInsufficientOutputBufferLength;
 731         } else {
 732             return kCFStringEncodingInvalidInputStream;
 733         }
 734     } else {
 735         const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 736         UInt32 usedLen = 0;
 737         UInt32 localUsedByteLen;
 738         UInt32 theUsedByteLen = 0;
 739         UInt32 theResult = kCFStringEncodingConversionSuccess;
 740         CFStringEncodingToBytesPrecomposeProc toBytesPrecompose = NULL;
 741         CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar = NULL;
 742
 743         if (!converter) return kCFStringEncodingConverterUnavailable;
 744
 745         if (flags & kCFStringEncodingSubstituteCombinings) {
 746             if (!(flags & kCFStringEncodingAllowLossyConversion)) isValidCombiningChar = converter->isValidCombiningChar;
 747        } else {
 748             isValidCombiningChar = converter->isValidCombiningChar;
 749             if (!(flags & kCFStringEncodingIgnoreCombinings)) {
 750                 toBytesPrecompose = converter->toBytesPrecompose;
 751                 flags |= kCFStringEncodingComposeCombinings;
 752             }
 753         }
 754
 755
 756         while ((usedLen < numChars) && (!maxByteLen || (theUsedByteLen < maxByteLen))) {
 757             if ((usedLen += TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) < numChars) {
 758                 UInt32 dummy;
 759
 760                 if (isValidCombiningChar && (usedLen > 0) && isValidCombiningChar(characters[usedLen])) {
 761                     if (toBytesPrecompose) {
 762                         UInt32 localUsedLen = usedLen;
 763
 764                         while (isValidCombiningChar(characters[--usedLen]));
 765                         theUsedByteLen += localUsedByteLen;
 766                         if (converter->maxLen > 1) {
 767                             TO_BYTE(converter, flags, characters + usedLen, localUsedLen - usedLen, NULL, 0, &localUsedByteLen);
 768                             theUsedByteLen -= localUsedByteLen;
 769                         } else {
 770                             theUsedByteLen--;
 771                         }
 772                         if ((localUsedLen = toBytesPrecompose(flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) > 0) {
 773                             usedLen += localUsedLen;
 774                             if ((usedLen < numChars) && isValidCombiningChar(characters[usedLen])) { // There is a non-base char not combined remaining
 775                                 theUsedByteLen += localUsedByteLen;
 776                                 theResult = kCFStringEncodingInvalidInputStream;
 777                                 break;
 778                             }
 779                         } else if (flags & kCFStringEncodingAllowLossyConversion) {
 780                             uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
 781
 782                             if (lossyByte) {
 783                                                                 while (isValidCombiningChar(characters[++usedLen]));
 784                                 localUsedByteLen = 1;
 785                                 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
 786                             } else {
 787                                 ++usedLen;
 788                                 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
 789                             }
 790                         } else {
 791                             theResult = kCFStringEncodingInvalidInputStream;
 792                             break;
 793                         }
 794                     } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
 795                                     theUsedByteLen += localUsedByteLen;
 796                                     theResult = kCFStringEncodingInsufficientOutputBufferLength;
 797                                     break;
 798                     } else if (flags & kCFStringEncodingIgnoreCombinings) {
 799                         while ((++usedLen < numChars) && isValidCombiningChar(characters[usedLen]));
 800                     } else {
 801                         uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
 802
 803                         theUsedByteLen += localUsedByteLen;
 804                         if (lossyByte) {
 805                             ++usedLen;
 806                             localUsedByteLen = 1;
 807                             if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
 808                         } else {
 809                             usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
 810                         }
 811                     }
 812                 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
 813                     theUsedByteLen += localUsedByteLen;
 814
 815                     if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
 816                         UInt32 localUsedLen;
 817
 818                         localUsedByteLen = 0;
 819                         while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
 820                     }
 821                     if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
 822                     break;
 823                 } else if (flags & kCFStringEncodingAllowLossyConversion) {
 824                     uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
 825
 826                     theUsedByteLen += localUsedByteLen;
 827                     if (lossyByte) {
 828                         ++usedLen;
 829                         localUsedByteLen = 1;
 830                         if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
 831                     } else {
 832                         usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
 833                     }
 834                 } else {
 835                     theUsedByteLen += localUsedByteLen;
 836                     theResult = kCFStringEncodingInvalidInputStream;
 837                     break;
 838                 }
 839             }
 840             theUsedByteLen += localUsedByteLen;
 841         }
 842
 843         if (usedLen < numChars && maxByteLen && theResult == kCFStringEncodingConversionSuccess) {
 844             if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
 845                 UInt32 localUsedLen;
 846
 847                 localUsedByteLen = 0;
 848                 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
 849             }
 850             if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
 851         }
 852         if (usedByteLen) *usedByteLen = theUsedByteLen;
 853         if (usedCharLen) *usedCharLen = usedLen;
 854
 855         return theResult;
 856     }
 857 }
 858
 859 UInt32 CFStringEncodingBytesToUnicode(UInt32 encoding, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UInt32 *usedByteLen, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 860     const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 861     UInt32 usedLen = 0;
 862     UInt32 theUsedCharLen = 0;
 863     UInt32 localUsedCharLen;
 864     UInt32 theResult = kCFStringEncodingConversionSuccess;
 865
 866     if (!converter) return kCFStringEncodingConverterUnavailable;
 867
 868
 869     while ((usedLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 870         if ((usedLen += TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen)) < numBytes) {
 871             UInt32 tempUsedCharLen;
 872
 873             if (maxCharLen && ((maxCharLen == theUsedCharLen + localUsedCharLen) || ((flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical)) && TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, NULL, 0, &tempUsedCharLen)))) { // buffer was filled up
 874                 theUsedCharLen += localUsedCharLen;
 875                 theResult = kCFStringEncodingInsufficientOutputBufferLength;
 876                 break;
 877             } else if (flags & kCFStringEncodingAllowLossyConversion) {
 878                 theUsedCharLen += localUsedCharLen;
 879                 usedLen += TO_UNICODE_FALLBACK(converter, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen);
 880             } else {
 881                 theUsedCharLen += localUsedCharLen;
 882                 theResult = kCFStringEncodingInvalidInputStream;
 883                 break;
 884             }
 885         }
 886         theUsedCharLen += localUsedCharLen;
 887     }
 888
 889     if (usedLen < numBytes && maxCharLen && theResult == kCFStringEncodingConversionSuccess) {
 890         theResult = kCFStringEncodingInsufficientOutputBufferLength;
 891     }
 892     if (usedCharLen) *usedCharLen = theUsedCharLen;
 893     if (usedByteLen) *usedByteLen = usedLen;
 894
 895     return theResult;
 896 }
 897
 898 __private_extern__ Boolean CFStringEncodingIsValidEncoding(UInt32 encoding) {
 899     return (CFStringEncodingGetConverter(encoding) ? true : false);
 900 }
 901
 902 __private_extern__ const char *CFStringEncodingName(UInt32 encoding) {
 903     _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
 904     if (entry) return entry->encodingName;
 905     return NULL;
 906 }
 907
 908 __private_extern__ const char **CFStringEncodingCanonicalCharsetNames(UInt32 encoding) {
 909     _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
 910     if (entry) return entry->ianaNames;
 911     return NULL;
 912 }
 913
 914 __private_extern__ UInt32 CFStringEncodingGetScriptCodeForEncoding(CFStringEncoding encoding) {
 915     _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
 916
 917     return (entry ? entry->scriptCode : ((encoding & 0x0FFF) == kCFStringEncodingUnicode ? kCFStringEncodingUnicode : (encoding < 0xFF ? encoding : kCFStringEncodingInvalidId)));
 918 }
 919
 920 __private_extern__ UInt32 CFStringEncodingCharLengthForBytes(UInt32 encoding, UInt32 flags, const uint8_t *bytes, UInt32 numBytes) {
 921     const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 922
 923     if (converter) {
 924         UInt32 switchVal = (UInt32)(converter->toUnicodeLen);
 925
 926             if (switchVal < 0xFFFF)
 927             return switchVal * numBytes;
 928         else
 929             return converter->toUnicodeLen(flags, bytes, numBytes);
 930     }
 931
 932     return 0;
 933 }
 934
 935 __private_extern__ UInt32 CFStringEncodingByteLengthForCharacters(UInt32 encoding, UInt32 flags, const UniChar *characters, UInt32 numChars) {
 936     const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 937
 938     if (converter) {
 939         UInt32 switchVal = (UInt32)(converter->toBytesLen);
 940
 941             if (switchVal < 0xFFFF)
 942             return switchVal * numChars;
 943         else
 944             return converter->toBytesLen(flags, characters, numChars);
 945     }
 946
 947     return 0;
 948 }
 949
 950 __private_extern__ void CFStringEncodingRegisterFallbackProcedures(UInt32 encoding, CFStringEncodingToBytesFallbackProc toBytes, CFStringEncodingToUnicodeFallbackProc toUnicode) {
 951     _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
 952
 953     if (entry && __CFGetConverter(encoding)) {
 954         ((_CFEncodingConverter*)entry->converter)->toBytesFallback = (toBytes ? toBytes : entry->toBytesFallback);
 955         ((_CFEncodingConverter*)entry->converter)->toUnicodeFallback = (toUnicode ? toUnicode : entry->toUnicodeFallback);
 956     }
 957 }
 958
 959 __private_extern__ const CFStringEncodingConverter *CFStringEncodingGetConverter(UInt32 encoding) {
 960     return __CFStringEncodingConverterGetDefinition(__CFStringEncodingConverterGetEntry(encoding));
 961 }
 962
 963 static const UInt32 __CFBuiltinEncodings[] = {
 964     kCFStringEncodingMacRoman,
 965     kCFStringEncodingWindowsLatin1,
 966     kCFStringEncodingISOLatin1,
 967     kCFStringEncodingNextStepLatin,
 968     kCFStringEncodingASCII,
 969     kCFStringEncodingUTF8,
 970     /* These seven are available only in CFString-level */
 971     kCFStringEncodingNonLossyASCII,
 972
 973     kCFStringEncodingUTF16,
 974     kCFStringEncodingUTF16BE,
 975     kCFStringEncodingUTF16LE,
 976
 977     kCFStringEncodingUTF32,
 978     kCFStringEncodingUTF32BE,
 979     kCFStringEncodingUTF32LE,
 980
 981     kCFStringEncodingInvalidId,
 982 };
 983
 984
 985 __private_extern__ const UInt32 *CFStringEncodingListOfAvailableEncodings(void) {
 986     return __CFBuiltinEncodings;
 987 }
 988