StringEncodings.subproj/CFStringEncodingConverter.c

   1 /*
   2  * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /*      CFStringEncodingConverter.c
  26         Copyright 1998-2002, Apple, Inc. All rights reserved.
  27         Responsibility: Aki Inoue
  28 */
  29
  30 #include "CFInternal.h"
  31 #include <CoreFoundation/CFArray.h>
  32 #include <CoreFoundation/CFDictionary.h>
  33 #include "CFUniChar.h"
  34 #include "CFUtilities.h"
  35 #include "CFUnicodeDecomposition.h"
  36 #include "CFStringEncodingConverterExt.h"
  37 #include "CFStringEncodingConverterPriv.h"
  38 #include <stdlib.h>
  39 #if !defined(__MACOS8__)
  40 #ifdef __WIN32__
  41 #include <windows.h>
  42 #else // Mach, HP-UX, Solaris
  43 #include <pthread.h>
  44 #endif
  45 #endif __MACOS8__
  46
  47
  48 /* Macros
  49 */
  50 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->_toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->toBytes)(flags,chars,numChars,bytes,max,used))
  51 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->_toUnicode ?  (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->toUnicode)(flags,bytes,numBytes,chars,max,used))
  52
  53 #define LineSeparator 0x2028
  54 #define ParagraphSeparator 0x2029
  55 #define ASCIINewLine 0x0a
  56 #define kSurrogateHighStart 0xD800
  57 #define kSurrogateHighEnd 0xDBFF
  58 #define kSurrogateLowStart 0xDC00
  59 #define kSurrogateLowEnd 0xDFFF
  60
  61 /* Mapping 128..255 to lossy ASCII
  62 */
  63 static const struct {
  64     unsigned char chars[4];
  65 } _toLossyASCIITable[] = {
  66     {{' ', 0, 0, 0}}, // NO-BREAK SPACE
  67     {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
  68     {{'c', 0, 0, 0}}, // CENT SIGN
  69     {{'L', 0, 0, 0}}, // POUND SIGN
  70     {{'$', 0, 0, 0}}, // CURRENCY SIGN
  71     {{'Y', 0, 0, 0}}, // YEN SIGN
  72     {{'|', 0, 0, 0}}, // BROKEN BAR
  73     {{0, 0, 0, 0}}, // SECTION SIGN
  74     {{0, 0, 0, 0}}, // DIAERESIS
  75     {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
  76     {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
  77     {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
  78     {{0, 0, 0, 0}}, // NOT SIGN
  79     {{'-', 0, 0, 0}}, // SOFT HYPHEN
  80     {{'(', 'R', ')', 0}}, // REGISTERED SIGN
  81     {{0, 0, 0, 0}}, // MACRON
  82     {{0, 0, 0, 0}}, // DEGREE SIGN
  83     {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
  84     {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
  85     {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
  86     {{0, 0, 0, 0}}, // ACUTE ACCENT
  87     {{0, 0, 0, 0}}, // MICRO SIGN
  88     {{0, 0, 0, 0}}, // PILCROW SIGN
  89     {{0, 0, 0, 0}}, // MIDDLE DOT
  90     {{0, 0, 0, 0}}, // CEDILLA
  91     {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
  92     {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
  93     {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
  94     {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
  95     {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
  96     {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
  97     {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
  98     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
  99     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
 100     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 101     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
 102     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
 103     {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
 104     {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
 105     {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
 106     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
 107     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
 108     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 109     {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
 110     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
 111     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
 112     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 113     {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
 114     {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
 115     {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
 116     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
 117     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
 118     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 119     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
 120     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
 121     {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
 122     {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
 123     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
 124     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
 125     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 126     {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
 127     {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
 128     {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
 129     {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
 130     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
 131     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
 132     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
 133     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
 134     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
 135     {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
 136     {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
 137     {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
 138     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
 139     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
 140     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
 141     {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
 142     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
 143     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
 144     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
 145     {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
 146     {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
 147     {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
 148     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
 149     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
 150     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
 151     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
 152     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
 153     {{'/', 0, 0, 0}}, // DIVISION SIGN
 154     {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
 155     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
 156     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
 157     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
 158     {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
 159     {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
 160     {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
 161     {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
 162 };
 163
 164 CF_INLINE UInt32 __CFToASCIILatin1Fallback(UniChar character, UInt8 *bytes, UInt32 maxByteLen) {
 165     const char *losChars = (const unsigned char*)_toLossyASCIITable + (character - 0xA0) * sizeof(unsigned char[4]);
 166     unsigned int numBytes = 0;
 167     int idx, max = (maxByteLen && (maxByteLen < 4) ? maxByteLen : 4);
 168
 169     for (idx = 0;idx < max;idx++) {
 170         if (losChars[idx]) {
 171             if (maxByteLen) bytes[idx] = losChars[idx];
 172             ++numBytes;
 173         } else {
 174             break;
 175         }
 176     }
 177
 178     return numBytes;
 179 }
 180
 181 static UInt32 __CFDefaultToBytesFallbackProc(const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
 182     if (*characters < 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
 183         if (maxByteLen) *bytes = (UInt8)(*characters - 0x80);
 184         *usedByteLen = 1;
 185         return 1;
 186     } else if (*characters < 0x100) {
 187         *usedByteLen = __CFToASCIILatin1Fallback(*characters, bytes, maxByteLen);
 188         return 1;
 189     } else if (*characters >= kSurrogateHighStart && *characters <= kSurrogateLowEnd) {
 190         if (maxByteLen) *bytes = '?';
 191         *usedByteLen = 1;
 192         return (numChars > 1 && *characters <= kSurrogateLowStart && *(characters + 1) >= kSurrogateLowStart && *(characters + 1) <= kSurrogateLowEnd ? 2 : 1);
 193     } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceCharacterSet)) {
 194         if (maxByteLen) *bytes = ' ';
 195         *usedByteLen = 1;
 196         return 1;
 197     } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceAndNewlineCharacterSet)) {
 198         if (maxByteLen) *bytes = ASCIINewLine;
 199         *usedByteLen = 1;
 200         return 1;
 201     } else if (!CFUniCharIsMemberOf(*characters, kCFUniCharLetterCharacterSet)) {
 202         *usedByteLen = 0;
 203         return 1;
 204     } else if (CFUniCharIsMemberOf(*characters, kCFUniCharDecomposableCharacterSet)) {
 205         UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
 206
 207         (void)CFUniCharDecomposeCharacter(*characters, decomposed, MAX_DECOMPOSED_LENGTH);
 208         if (*decomposed < 0x80) {
 209             if (maxByteLen) *bytes = (UInt8)(*decomposed);
 210             *usedByteLen = 1;
 211             return 1;
 212         } else {
 213             UTF16Char theChar = *decomposed;
 214
 215             return __CFDefaultToBytesFallbackProc(&theChar, 1, bytes, maxByteLen, usedByteLen);
 216         }
 217     } else {
 218         if (maxByteLen) *bytes = '?';
 219         *usedByteLen = 1;
 220         return 1;
 221     }
 222 }
 223
 224 static UInt32 __CFDefaultToUnicodeFallbackProc(const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 225     if (maxCharLen) *characters = (UniChar)'?';
 226     *usedCharLen = 1;
 227     return 1;
 228 }
 229
 230 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
 231 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
 232
 233 #define EXTRA_BASE (0x0F00)
 234
 235 /* Wrapper funcs for non-standard converters
 236 */
 237 static UInt32 __CFToBytesCheapEightBitWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
 238     UInt32 processedCharLen = 0;
 239     UInt32 length = (maxByteLen && (maxByteLen < numChars) ? maxByteLen : numChars);
 240     uint8_t byte;
 241
 242     while (processedCharLen < length) {
 243         if (!((CFStringEncodingCheapEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], &byte)) break;
 244
 245         if (maxByteLen) bytes[processedCharLen] = byte;
 246         processedCharLen++;
 247     }
 248
 249     *usedByteLen = processedCharLen;
 250     return processedCharLen;
 251 }
 252
 253 static UInt32 __CFToUnicodeCheapEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 254     UInt32 processedByteLen = 0;
 255     UInt32 length = (maxCharLen && (maxCharLen < numBytes) ? maxCharLen : numBytes);
 256     UniChar character;
 257
 258     while (processedByteLen < length) {
 259         if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
 260
 261         if (maxCharLen) characters[processedByteLen] = character;
 262         processedByteLen++;
 263     }
 264
 265     *usedCharLen = processedByteLen;
 266     return processedByteLen;
 267 }
 268
 269 static UInt32 __CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 270     UInt32 processedByteLen = 0;
 271     UInt32 theUsedCharLen = 0;
 272     UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
 273     UInt32 usedLen;
 274     UniChar character;
 275     bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
 276
 277     while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 278         if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
 279
 280         if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
 281             uint32_t idx;
 282
 283             usedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
 284             *usedCharLen = theUsedCharLen;
 285
 286             for (idx = 0;idx < usedLen;idx++) {
 287                 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
 288                     if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
 289                     theUsedCharLen += 2;
 290                     if (maxCharLen) {
 291                         charBuffer[idx] = charBuffer[idx] - 0x10000;
 292                         *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
 293                         *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
 294                     }
 295                 } else {
 296                     if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
 297                     ++theUsedCharLen;
 298                     *(characters++) = charBuffer[idx];
 299                 }
 300             }
 301         } else {
 302             if (maxCharLen) *(characters++) = character;
 303             ++theUsedCharLen;
 304         }
 305         processedByteLen++;
 306     }
 307
 308     *usedCharLen = theUsedCharLen;
 309     return processedByteLen;
 310 }
 311
 312 static UInt32 __CFToBytesStandardEightBitWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
 313     UInt32 processedCharLen = 0;
 314     uint8_t byte;
 315     UInt32 usedLen;
 316
 317     *usedByteLen = 0;
 318
 319     while (numChars && (!maxByteLen || (*usedByteLen < maxByteLen))) {
 320         if (!(usedLen = ((CFStringEncodingStandardEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters, numChars, &byte))) break;
 321
 322         if (maxByteLen) bytes[*usedByteLen] = byte;
 323         (*usedByteLen)++;
 324         characters += usedLen;
 325         numChars -= usedLen;
 326         processedCharLen += usedLen;
 327     }
 328
 329     return processedCharLen;
 330 }
 331
 332 static UInt32 __CFToUnicodeStandardEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 333     UInt32 processedByteLen = 0;
 334 #if defined(__MACOS8__) || defined(__WIN32__)
 335     UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
 336 #else
 337     UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
 338 #endif
 339     UInt32 usedLen;
 340
 341     *usedCharLen = 0;
 342
 343     while ((processedByteLen < numBytes) && (!maxCharLen || (*usedCharLen < maxCharLen))) {
 344         if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
 345
 346         if (maxCharLen) {
 347             uint16_t idx;
 348
 349             if (*usedCharLen + usedLen > maxCharLen) break;
 350
 351             for (idx = 0;idx < usedLen;idx++) {
 352                 characters[*usedCharLen + idx] = charBuffer[idx];
 353             }
 354         }
 355         *usedCharLen += usedLen;
 356         processedByteLen++;
 357     }
 358
 359     return processedByteLen;
 360 }
 361
 362 static UInt32 __CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 363     UInt32 processedByteLen = 0;
 364 #if defined(__MACOS8__) || defined(__WIN32__)
 365     UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
 366 #else
 367     UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
 368 #endif
 369     UTF32Char decompBuffer[MAX_DECOMPOSED_LENGTH];
 370     UInt32 usedLen;
 371     UInt32 decompedLen;
 372     UInt32 idx, decompIndex;
 373     bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
 374     UInt32 theUsedCharLen = 0;
 375
 376     while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 377         if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
 378
 379         for (idx = 0;idx < usedLen;idx++) {
 380             if (CFUniCharIsDecomposableCharacter(charBuffer[idx], isHFSPlus)) {
 381                 decompedLen = CFUniCharDecomposeCharacter(charBuffer[idx], decompBuffer, MAX_DECOMPOSED_LENGTH);
 382                 *usedCharLen = theUsedCharLen;
 383
 384                 for (decompIndex = 0;decompIndex < decompedLen;decompIndex++) {
 385                     if (decompBuffer[decompIndex] > 0xFFFF) { // Non-BMP
 386                         if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
 387                         theUsedCharLen += 2;
 388                         if (maxCharLen) {
 389                             charBuffer[idx] = charBuffer[idx] - 0x10000;
 390                             *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
 391                             *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
 392                         }
 393                     } else {
 394                         if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
 395                         ++theUsedCharLen;
 396                         *(characters++) = charBuffer[idx];
 397                     }
 398                 }
 399             } else {
 400                 if (maxCharLen) *(characters++) = charBuffer[idx];
 401                 ++theUsedCharLen;
 402             }
 403         }
 404         processedByteLen++;
 405     }
 406
 407     *usedCharLen = theUsedCharLen;
 408     return processedByteLen;
 409 }
 410
 411 static UInt32 __CFToBytesCheapMultiByteWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
 412     UInt32 processedCharLen = 0;
 413 #if defined(__MACOS8__) || defined(__WIN32__)
 414     uint8_t byteBuffer[20]; // Dynamic stack allocation is GNU specific
 415 #else
 416     uint8_t byteBuffer[((const _CFEncodingConverter*)converter)->maxLen];
 417 #endif
 418     UInt32 usedLen;
 419
 420     *usedByteLen = 0;
 421
 422     while ((processedCharLen < numChars) && (!maxByteLen || (*usedByteLen < maxByteLen))) {
 423         if (!(usedLen = ((CFStringEncodingCheapMultiByteToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], byteBuffer))) break;
 424
 425         if (maxByteLen) {
 426             uint16_t idx;
 427
 428             if (*usedByteLen + usedLen > maxByteLen) break;
 429
 430             for (idx = 0;idx <usedLen;idx++) {
 431                 bytes[*usedByteLen + idx] = byteBuffer[idx];
 432             }
 433         }
 434
 435         *usedByteLen += usedLen;
 436         processedCharLen++;
 437     }
 438
 439     return processedCharLen;
 440 }
 441
 442 static UInt32 __CFToUnicodeCheapMultiByteWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 443     UInt32 processedByteLen = 0;
 444     UniChar character;
 445     UInt32 usedLen;
 446
 447     *usedCharLen = 0;
 448
 449     while (numBytes && (!maxCharLen || (*usedCharLen < maxCharLen))) {
 450         if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
 451
 452         if (maxCharLen) *(characters++) = character;
 453         (*usedCharLen)++;
 454         processedByteLen += usedLen;
 455         bytes += usedLen;
 456         numBytes -= usedLen;
 457     }
 458
 459     return processedByteLen;
 460 }
 461
 462 static UInt32 __CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 463     UInt32 processedByteLen = 0;
 464     UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
 465     UniChar character;
 466     UInt32 usedLen;
 467     UInt32 decomposedLen;
 468     UInt32 theUsedCharLen = 0;
 469     bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
 470
 471     while (numBytes && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 472         if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
 473
 474         if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
 475             uint32_t idx;
 476
 477             decomposedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
 478             *usedCharLen = theUsedCharLen;
 479
 480             for (idx = 0;idx < decomposedLen;idx++) {
 481                 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
 482                     if (theUsedCharLen + 2 > maxCharLen)  return processedByteLen;
 483                     theUsedCharLen += 2;
 484                     if (maxCharLen) {
 485                         charBuffer[idx] = charBuffer[idx] - 0x10000;
 486                         *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
 487                         *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
 488                     }
 489                 } else {
 490                     if (theUsedCharLen + 1 > maxCharLen)  return processedByteLen;
 491                     ++theUsedCharLen;
 492                     *(characters++) = charBuffer[idx];
 493                 }
 494             }
 495         } else {
 496             if (maxCharLen) *(characters++) = character;
 497             ++theUsedCharLen;
 498         }
 499
 500         processedByteLen += usedLen;
 501         bytes += usedLen;
 502         numBytes -= usedLen;
 503     }
 504     *usedCharLen = theUsedCharLen;
 505     return processedByteLen;
 506 }
 507
 508 /* static functions
 509 */
 510 static _CFConverterEntry __CFConverterEntryASCII = {
 511     kCFStringEncodingASCII, NULL,
 512     "Western (ASCII)", {"us-ascii", "ascii", "iso-646-us", NULL}, NULL, NULL, NULL, NULL,
 513     kCFStringEncodingMacRoman // We use string encoding's script range here
 514 };
 515
 516 static _CFConverterEntry __CFConverterEntryISOLatin1 = {
 517     kCFStringEncodingISOLatin1, NULL,
 518     "Western (ISO Latin 1)", {"iso-8859-1", "latin1","iso-latin-1", NULL}, NULL, NULL, NULL, NULL,
 519     kCFStringEncodingMacRoman // We use string encoding's script range here
 520 };
 521
 522 static _CFConverterEntry __CFConverterEntryMacRoman = {
 523     kCFStringEncodingMacRoman, NULL,
 524     "Western (Mac OS Roman)", {"macintosh", "mac", "x-mac-roman", NULL}, NULL, NULL, NULL, NULL,
 525     kCFStringEncodingMacRoman // We use string encoding's script range here
 526 };
 527
 528 static _CFConverterEntry __CFConverterEntryWinLatin1 = {
 529     kCFStringEncodingWindowsLatin1, NULL,
 530     "Western (Windows Latin 1)", {"windows-1252", "cp1252", "windows latin1", NULL}, NULL, NULL, NULL, NULL,
 531     kCFStringEncodingMacRoman // We use string encoding's script range here
 532 };
 533
 534 static _CFConverterEntry __CFConverterEntryNextStepLatin = {
 535     kCFStringEncodingNextStepLatin, NULL,
 536     "Western (NextStep)", {"x-nextstep", NULL, NULL, NULL}, NULL, NULL, NULL, NULL,
 537     kCFStringEncodingMacRoman // We use string encoding's script range here
 538 };
 539
 540 static _CFConverterEntry __CFConverterEntryUTF8 = {
 541     kCFStringEncodingUTF8, NULL,
 542     "UTF-8", {"utf-8", "unicode-1-1-utf8", NULL, NULL}, NULL, NULL, NULL, NULL,
 543     kCFStringEncodingUnicode // We use string encoding's script range here
 544 };
 545
 546 CF_INLINE _CFConverterEntry *__CFStringEncodingConverterGetEntry(UInt32 encoding) {
 547     switch (encoding) {
 548         case kCFStringEncodingInvalidId:
 549         case kCFStringEncodingASCII:
 550             return &__CFConverterEntryASCII;
 551
 552         case kCFStringEncodingISOLatin1:
 553             return &__CFConverterEntryISOLatin1;
 554
 555         case kCFStringEncodingMacRoman:
 556             return &__CFConverterEntryMacRoman;
 557
 558         case kCFStringEncodingWindowsLatin1:
 559             return &__CFConverterEntryWinLatin1;
 560
 561         case kCFStringEncodingNextStepLatin:
 562             return &__CFConverterEntryNextStepLatin;
 563
 564         case kCFStringEncodingUTF8:
 565             return &__CFConverterEntryUTF8;
 566
 567         default: return NULL;
 568     }
 569 }
 570
 571 CF_INLINE _CFEncodingConverter *__CFEncodingConverterFromDefinition(const CFStringEncodingConverter *definition) {
 572 #define NUM_OF_ENTRIES_CYCLE (10)
 573     static CFSpinLock_t _indexLock = 0;
 574     static UInt32 _currentIndex = 0;
 575     static UInt32 _allocatedSize = 0;
 576     static _CFEncodingConverter *_allocatedEntries = NULL;
 577     _CFEncodingConverter *converter;
 578
 579
 580     __CFSpinLock(&_indexLock);
 581     if ((_currentIndex + 1) >= _allocatedSize) {
 582         _currentIndex = 0;
 583         _allocatedSize = 0;
 584         _allocatedEntries = NULL;
 585     }
 586     if (_allocatedEntries == NULL) { // Not allocated yet
 587         _allocatedEntries = (_CFEncodingConverter *)CFAllocatorAllocate(NULL, sizeof(_CFEncodingConverter) * NUM_OF_ENTRIES_CYCLE, 0);
 588         _allocatedSize = NUM_OF_ENTRIES_CYCLE;
 589         converter = &(_allocatedEntries[_currentIndex]);
 590     } else {
 591         converter = &(_allocatedEntries[++_currentIndex]);
 592     }
 593     __CFSpinUnlock(&_indexLock);
 594
 595     switch (definition->encodingClass) {
 596         case kCFStringEncodingConverterStandard:
 597             converter->toBytes = definition->toBytes;
 598             converter->toUnicode = definition->toUnicode;
 599             converter->toCanonicalUnicode = definition->toUnicode;
 600             converter->_toBytes = NULL;
 601             converter->_toUnicode = NULL;
 602             converter->maxLen = 2;
 603             break;
 604
 605         case kCFStringEncodingConverterCheapEightBit:
 606             converter->toBytes = __CFToBytesCheapEightBitWrapper;
 607             converter->toUnicode = __CFToUnicodeCheapEightBitWrapper;
 608             converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapEightBitWrapper;
 609             converter->_toBytes = definition->toBytes;
 610             converter->_toUnicode = definition->toUnicode;
 611             converter->maxLen = 1;
 612             break;
 613
 614         case kCFStringEncodingConverterStandardEightBit:
 615             converter->toBytes = __CFToBytesStandardEightBitWrapper;
 616             converter->toUnicode = __CFToUnicodeStandardEightBitWrapper;
 617             converter->toCanonicalUnicode = __CFToCanonicalUnicodeStandardEightBitWrapper;
 618             converter->_toBytes = definition->toBytes;
 619             converter->_toUnicode = definition->toUnicode;
 620             converter->maxLen = definition->maxDecomposedCharLen;
 621             break;
 622
 623         case kCFStringEncodingConverterCheapMultiByte:
 624             converter->toBytes = __CFToBytesCheapMultiByteWrapper;
 625             converter->toUnicode = __CFToUnicodeCheapMultiByteWrapper;
 626             converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapMultiByteWrapper;
 627             converter->_toBytes = definition->toBytes;
 628             converter->_toUnicode = definition->toUnicode;
 629             converter->maxLen = definition->maxBytesPerChar;
 630             break;
 631
 632         case kCFStringEncodingConverterPlatformSpecific:
 633             converter->toBytes = NULL;
 634             converter->toUnicode = NULL;
 635             converter->toCanonicalUnicode = NULL;
 636             converter->_toBytes = NULL;
 637             converter->_toUnicode = NULL;
 638             converter->maxLen = 0;
 639             converter->toBytesLen = NULL;
 640             converter->toUnicodeLen = NULL;
 641             converter->toBytesFallback = NULL;
 642             converter->toUnicodeFallback = NULL;
 643             converter->toBytesPrecompose = NULL;
 644             converter->isValidCombiningChar = NULL;
 645             return converter;
 646
 647         default: // Shouln't be here
 648             return NULL;
 649     }
 650
 651     converter->toBytesLen = (definition->toBytesLen ? definition->toBytesLen : (CFStringEncodingToBytesLenProc)(UInt32)definition->maxBytesPerChar);
 652     converter->toUnicodeLen = (definition->toUnicodeLen ? definition->toUnicodeLen : (CFStringEncodingToUnicodeLenProc)(UInt32)definition->maxDecomposedCharLen);
 653     converter->toBytesFallback = (definition->toBytesFallback ? definition->toBytesFallback : __CFDefaultToBytesFallbackProc);
 654     converter->toUnicodeFallback = (definition->toUnicodeFallback ? definition->toUnicodeFallback : __CFDefaultToUnicodeFallbackProc);
 655     converter->toBytesPrecompose = (definition->toBytesPrecompose ? definition->toBytesPrecompose : NULL);
 656     converter->isValidCombiningChar = (definition->isValidCombiningChar ? definition->isValidCombiningChar : NULL);
 657
 658     return converter;
 659 }
 660
 661 CF_INLINE const CFStringEncodingConverter *__CFStringEncodingConverterGetDefinition(_CFConverterEntry *entry) {
 662     if (!entry) return NULL;
 663
 664     switch (entry->encoding) {
 665         case kCFStringEncodingASCII:
 666             return &__CFConverterASCII;
 667
 668         case kCFStringEncodingISOLatin1:
 669             return &__CFConverterISOLatin1;
 670
 671         case kCFStringEncodingMacRoman:
 672             return &__CFConverterMacRoman;
 673
 674         case kCFStringEncodingWindowsLatin1:
 675             return &__CFConverterWinLatin1;
 676
 677         case kCFStringEncodingNextStepLatin:
 678             return &__CFConverterNextStepLatin;
 679
 680         case kCFStringEncodingUTF8:
 681             return &__CFConverterUTF8;
 682
 683         default:
 684             return NULL;
 685     }
 686 }
 687
 688 static const _CFEncodingConverter *__CFGetConverter(UInt32 encoding) {
 689     _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
 690
 691     if (!entry) return NULL;
 692
 693     if (!entry->converter) {
 694         const CFStringEncodingConverter *definition = __CFStringEncodingConverterGetDefinition(entry);
 695
 696         if (definition) {
 697             entry->converter = __CFEncodingConverterFromDefinition(definition);
 698             entry->toBytesFallback = definition->toBytesFallback;
 699             entry->toUnicodeFallback = definition->toUnicodeFallback;
 700         }
 701     }
 702
 703     return (_CFEncodingConverter *)entry->converter;
 704 }
 705
 706 /* Public API
 707 */
 708 UInt32 CFStringEncodingUnicodeToBytes(UInt32 encoding, UInt32 flags, const UniChar *characters, UInt32 numChars, UInt32 *usedCharLen, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
 709     if (encoding == kCFStringEncodingUTF8) {
 710         static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
 711         uint32_t convertedCharLen;
 712         uint32_t usedLen;
 713
 714
 715         if ((flags & kCFStringEncodingUseCanonical) || (flags & kCFStringEncodingUseHFSPlusCanonical)) {
 716             (void)CFUniCharDecompose(characters, numChars, &convertedCharLen, (void *)bytes, maxByteLen, &usedLen, true, kCFUniCharUTF8Format, (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false));
 717         } else {
 718             if (!__CFToUTF8) {
 719                 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
 720                 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
 721             }
 722             convertedCharLen = __CFToUTF8(0, characters, numChars, bytes, maxByteLen, (UInt32 *)&usedLen);
 723         }
 724         if (usedCharLen) *usedCharLen = convertedCharLen;
 725         if (usedByteLen) *usedByteLen = usedLen;
 726
 727         if (convertedCharLen == numChars) {
 728             return kCFStringEncodingConversionSuccess;
 729         } else if (maxByteLen && (maxByteLen == usedLen)) {
 730             return kCFStringEncodingInsufficientOutputBufferLength;
 731         } else {
 732             return kCFStringEncodingInvalidInputStream;
 733         }
 734     } else {
 735         const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 736         UInt32 usedLen = 0;
 737         UInt32 localUsedByteLen;
 738         UInt32 theUsedByteLen = 0;
 739         UInt32 theResult = kCFStringEncodingConversionSuccess;
 740         CFStringEncodingToBytesPrecomposeProc toBytesPrecompose = NULL;
 741         CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar = NULL;
 742
 743         if (!converter) return kCFStringEncodingConverterUnavailable;
 744
 745         if (flags & kCFStringEncodingSubstituteCombinings) {
 746             if (!(flags & kCFStringEncodingAllowLossyConversion)) isValidCombiningChar = converter->isValidCombiningChar;
 747        } else {
 748             isValidCombiningChar = converter->isValidCombiningChar;
 749             if (!(flags & kCFStringEncodingIgnoreCombinings)) {
 750                 toBytesPrecompose = converter->toBytesPrecompose;
 751                 flags |= kCFStringEncodingComposeCombinings;
 752             }
 753         }
 754
 755
 756         while ((usedLen < numChars) && (!maxByteLen || (theUsedByteLen < maxByteLen))) {
 757             if ((usedLen += TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) < numChars) {
 758                 UInt32 dummy;
 759
 760                 if (isValidCombiningChar && (usedLen > 0) && isValidCombiningChar(characters[usedLen])) {
 761                     if (toBytesPrecompose) {
 762                         UInt32 localUsedLen = usedLen;
 763
 764                         while (isValidCombiningChar(characters[--usedLen]));
 765                         theUsedByteLen += localUsedByteLen;
 766                         if (converter->maxLen > 1) {
 767                             TO_BYTE(converter, flags, characters + usedLen, localUsedLen - usedLen, NULL, 0, &localUsedByteLen);
 768                             theUsedByteLen -= localUsedByteLen;
 769                         } else {
 770                             theUsedByteLen--;
 771                         }
 772                         if ((localUsedLen = toBytesPrecompose(flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) > 0) {
 773                             usedLen += localUsedLen;
 774                             if ((usedLen < numChars) && isValidCombiningChar(characters[usedLen])) { // There is a non-base char not combined remaining
 775                                 theUsedByteLen += localUsedByteLen;
 776                                 theResult = kCFStringEncodingInvalidInputStream;
 777                                 break;
 778                             }
 779                         } else if (flags & kCFStringEncodingAllowLossyConversion) {
 780                             uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
 781
 782                             if (lossyByte) {
 783                                                                 while (isValidCombiningChar(characters[++usedLen]));
 784                                 localUsedByteLen = 1;
 785                                 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
 786                             } else {
 787                                 ++usedLen;
 788                                 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
 789                             }
 790                         } else {
 791                             theResult = kCFStringEncodingInvalidInputStream;
 792                             break;
 793                         }
 794                     } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
 795                                     theUsedByteLen += localUsedByteLen;
 796                                     theResult = kCFStringEncodingInsufficientOutputBufferLength;
 797                                     break;
 798                     } else if (flags & kCFStringEncodingIgnoreCombinings) {
 799                         while ((++usedLen < numChars) && isValidCombiningChar(characters[usedLen]));
 800                     } else {
 801                         uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
 802
 803                         theUsedByteLen += localUsedByteLen;
 804                         if (lossyByte) {
 805                             ++usedLen;
 806                             localUsedByteLen = 1;
 807                             if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
 808                         } else {
 809                             usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
 810                         }
 811                     }
 812                 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
 813                     theUsedByteLen += localUsedByteLen;
 814
 815                     if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
 816                         UInt32 localUsedLen;
 817
 818                         localUsedByteLen = 0;
 819                         while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
 820                     }
 821                     if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
 822                     break;
 823                 } else if (flags & kCFStringEncodingAllowLossyConversion) {
 824                     uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
 825
 826                     theUsedByteLen += localUsedByteLen;
 827                     if (lossyByte) {
 828                         ++usedLen;
 829                         localUsedByteLen = 1;
 830                         if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
 831                     } else {
 832                         usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
 833                     }
 834                 } else {
 835                     theUsedByteLen += localUsedByteLen;
 836                     theResult = kCFStringEncodingInvalidInputStream;
 837                     break;
 838                 }
 839             }
 840             theUsedByteLen += localUsedByteLen;
 841         }
 842
 843         if (usedLen < numChars && maxByteLen && theResult == kCFStringEncodingConversionSuccess) {
 844             if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
 845                 UInt32 localUsedLen;
 846
 847                 localUsedByteLen = 0;
 848                 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
 849             }
 850             if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
 851         }
 852         if (usedByteLen) *usedByteLen = theUsedByteLen;
 853         if (usedCharLen) *usedCharLen = usedLen;
 854
 855         return theResult;
 856     }
 857 }
 858
 859 UInt32 CFStringEncodingBytesToUnicode(UInt32 encoding, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UInt32 *usedByteLen, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
 860     const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 861     UInt32 usedLen = 0;
 862     UInt32 theUsedCharLen = 0;
 863     UInt32 localUsedCharLen;
 864     UInt32 theResult = kCFStringEncodingConversionSuccess;
 865
 866     if (!converter) return kCFStringEncodingConverterUnavailable;
 867
 868
 869     while ((usedLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
 870         if ((usedLen += TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen)) < numBytes) {
 871             UInt32 tempUsedCharLen;
 872
 873             if (maxCharLen && ((maxCharLen == theUsedCharLen + localUsedCharLen) || ((flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical)) && TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, NULL, 0, &tempUsedCharLen)))) { // buffer was filled up
 874                 theUsedCharLen += localUsedCharLen;
 875                 theResult = kCFStringEncodingInsufficientOutputBufferLength;
 876                 break;
 877             } else if (flags & kCFStringEncodingAllowLossyConversion) {
 878                 theUsedCharLen += localUsedCharLen;
 879                 usedLen += TO_UNICODE_FALLBACK(converter, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen);
 880             } else {
 881                 theUsedCharLen += localUsedCharLen;
 882                 theResult = kCFStringEncodingInvalidInputStream;
 883                 break;
 884             }
 885         }
 886         theUsedCharLen += localUsedCharLen;
 887     }
 888
 889     if (usedLen < numBytes && maxCharLen && theResult == kCFStringEncodingConversionSuccess) {
 890         theResult = kCFStringEncodingInsufficientOutputBufferLength;
 891     }
 892     if (usedCharLen) *usedCharLen = theUsedCharLen;
 893     if (usedByteLen) *usedByteLen = usedLen;
 894
 895     return theResult;
 896 }
 897
 898 __private_extern__ Boolean CFStringEncodingIsValidEncoding(UInt32 encoding) {
 899     return (CFStringEncodingGetConverter(encoding) ? true : false);
 900 }
 901
 902 __private_extern__ const char *CFStringEncodingName(UInt32 encoding) {
 903     _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
 904     if (entry) return entry->encodingName;
 905     return NULL;
 906 }
 907
 908 __private_extern__ const char **CFStringEncodingCanonicalCharsetNames(UInt32 encoding) {
 909     _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
 910     if (entry) return entry->ianaNames;
 911     return NULL;
 912 }
 913
 914 __private_extern__ UInt32 CFStringEncodingGetScriptCodeForEncoding(CFStringEncoding encoding) {
 915     _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
 916
 917     return (entry ? entry->scriptCode : (encoding == kCFStringEncodingUnicode ? kCFStringEncodingUnicode : (encoding < 0xFF ? encoding : kCFStringEncodingInvalidId)));
 918 }
 919
 920 __private_extern__ UInt32 CFStringEncodingCharLengthForBytes(UInt32 encoding, UInt32 flags, const uint8_t *bytes, UInt32 numBytes) {
 921     const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 922
 923     if (converter) {
 924         UInt32 switchVal = (UInt32)(converter->toUnicodeLen);
 925
 926             if (switchVal < 0xFFFF)
 927             return switchVal * numBytes;
 928         else
 929             return converter->toUnicodeLen(flags, bytes, numBytes);
 930     }
 931
 932     return 0;
 933 }
 934
 935 __private_extern__ UInt32 CFStringEncodingByteLengthForCharacters(UInt32 encoding, UInt32 flags, const UniChar *characters, UInt32 numChars) {
 936     const _CFEncodingConverter *converter = __CFGetConverter(encoding);
 937
 938     if (converter) {
 939         UInt32 switchVal = (UInt32)(converter->toBytesLen);
 940
 941             if (switchVal < 0xFFFF)
 942             return switchVal * numChars;
 943         else
 944             return converter->toBytesLen(flags, characters, numChars);
 945     }
 946
 947     return 0;
 948 }
 949
 950 __private_extern__ void CFStringEncodingRegisterFallbackProcedures(UInt32 encoding, CFStringEncodingToBytesFallbackProc toBytes, CFStringEncodingToUnicodeFallbackProc toUnicode) {
 951     _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
 952
 953     if (entry && __CFGetConverter(encoding)) {
 954         ((_CFEncodingConverter*)entry->converter)->toBytesFallback = (toBytes ? toBytes : entry->toBytesFallback);
 955         ((_CFEncodingConverter*)entry->converter)->toUnicodeFallback = (toUnicode ? toUnicode : entry->toUnicodeFallback);
 956     }
 957 }
 958
 959 __private_extern__ const CFStringEncodingConverter *CFStringEncodingGetConverter(UInt32 encoding) {
 960     return __CFStringEncodingConverterGetDefinition(__CFStringEncodingConverterGetEntry(encoding));
 961 }
 962
 963 static const UInt32 __CFBuiltinEncodings[] = {
 964     kCFStringEncodingMacRoman,
 965     kCFStringEncodingWindowsLatin1,
 966     kCFStringEncodingISOLatin1,
 967     kCFStringEncodingNextStepLatin,
 968     kCFStringEncodingASCII,
 969     kCFStringEncodingUTF8,
 970     /* These two are available only in CFString-level */
 971     kCFStringEncodingUnicode,
 972     kCFStringEncodingNonLossyASCII,
 973     kCFStringEncodingInvalidId,
 974 };
 975
 976
 977 __private_extern__ const UInt32 *CFStringEncodingListOfAvailableEncodings(void) {
 978     return __CFBuiltinEncodings;
 979 }
 980