String.subproj/CFStringEncodings.c

   1 /*
   2  * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /*      CFStringEncodings.c
  26         Copyright 1999-2002, Apple, Inc. All rights reserved.
  27         Responsibility: Aki Inoue
  28 */
  29
  30 #include "CFInternal.h"
  31 #include <CoreFoundation/CFString.h>
  32 #include <CoreFoundation/CFByteOrder.h>
  33 #include "CFUtilities.h"
  34 #include <string.h>
  35 #include "CFStringEncodingConverterExt.h"
  36 #include "CFUniChar.h"
  37 #include "CFUnicodeDecomposition.h"
  38
  39 static UInt32 __CFWantsToUseASCIICompatibleConversion = (UInt32)-1;
  40 CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) {
  41     if (__CFWantsToUseASCIICompatibleConversion == (UInt32)-1) {
  42         __CFWantsToUseASCIICompatibleConversion = false;
  43     }
  44     return (__CFWantsToUseASCIICompatibleConversion ? kCFStringEncodingASCIICompatibleConversion : 0);
  45 }
  46
  47 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
  48     __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
  49 }
  50
  51 Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
  52
  53 // To avoid early initialization issues, we just initialize this here
  54 // This should not be const as it is changed
  55 UniChar __CFCharToUniCharTable[256] = {
  56   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
  57  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
  58  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
  59  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
  60  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
  61  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
  62  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
  63 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
  64 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
  65 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
  66 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
  67 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
  68 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
  69 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
  70 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
  71 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
  72 };
  73
  74 void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
  75     if (__CFCharToUniCharFunc != func) {
  76         int ch;
  77         __CFCharToUniCharFunc = func;
  78         if (func) {
  79             for (ch = 128; ch < 256; ch++) {
  80                 UniChar uch;
  81                 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
  82             }
  83         } else {        // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
  84             for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
  85         }
  86     }
  87 }
  88
  89 __private_extern__ void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
  90     CFIndex idx;
  91     for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
  92 }
  93
  94
  95 /* The minimum length the output buffers should be in the above functions
  96 */
  97 #define kCFCharConversionBufferLength 512
  98
  99
 100 #define MAX_LOCAL_CHARS         (sizeof(buffer->localBuffer) / sizeof(uint8_t))
 101 #define MAX_LOCAL_UNICHARS      (sizeof(buffer->localBuffer) / sizeof(UniChar))
 102
 103 #if defined(__BIG_ENDIAN__)
 104 #define SHOULD_SWAP(BOM) (BOM == 0xFFFE)
 105 #else
 106 #define SHOULD_SWAP(BOM) (BOM != 0xFEFF)
 107 #endif
 108
 109 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
 110 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
 111 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
 112 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
 113 !!! converterFlags is only used for the UTF8 converter at this point
 114 */
 115 Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
 116     return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
 117 }
 118
 119 enum {
 120     __NSNonLossyErrorMode = -1,
 121     __NSNonLossyASCIIMode = 0,
 122     __NSNonLossyBackslashMode = 1,
 123     __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
 124     __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
 125     __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
 126     __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
 127 };
 128
 129 Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
 130     UInt32 idx;
 131     const UniChar *uniChars = (const UniChar *)bytes;
 132     const uint8_t *chars = (const uint8_t *)bytes;
 133     const uint8_t *end = chars + len;
 134     uint16_t bom;
 135     Boolean allASCII = false;
 136
 137     if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
 138
 139     buffer->isASCII = !alwaysUnicode;
 140     buffer->shouldFreeChars = false;
 141     buffer->numChars = 0;
 142     if (0 == len) return true;
 143
 144     buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
 145     switch (encoding) {
 146     case kCFStringEncodingUnicode:
 147         bom = (*uniChars == 0xfffe || *uniChars == 0xfeff) ? (*uniChars++) : 0;
 148         /* If the byte order mark is missing, we assume big endian... */
 149         len = len / 2 - (0 == bom ? 0 : 1);
 150
 151         if (buffer->isASCII) {  // Let's see if we can reduce the Unicode down to ASCII...
 152             if (SHOULD_SWAP(bom)) {
 153                 for (idx = 0; idx < len; idx++) if ((uniChars[idx] & 0x80ff) != 0) {buffer->isASCII = false; break;}
 154             } else {
 155                 for (idx = 0; idx < len; idx++) if (uniChars[idx] > 127) {buffer->isASCII = false; break;}
 156             }
 157         }
 158
 159         if (buffer->isASCII) {
 160             buffer->numChars = len;
 161             buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 162             buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 163             if (SHOULD_SWAP(bom)) {     // !!! Can be somewhat trickier here and use a single loop with a properly inited ptr
 164                 for (idx = 0; idx < len; idx++) buffer->chars.ascii[idx] = (uniChars[idx] >> 8);
 165             } else {
 166                 for (idx = 0; idx < len; idx++) buffer->chars.ascii[idx] = uniChars[idx];
 167             }
 168         } else {
 169             buffer->numChars = len;
 170             if (useClientsMemoryPtr && (bom == 0) && !SHOULD_SWAP(bom)) {       // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
 171                 *useClientsMemoryPtr = true;
 172                 buffer->shouldFreeChars = false;
 173                 buffer->chars.unicode = (UniChar *)bytes;
 174             } else {
 175                 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 176                 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 177             if (SHOULD_SWAP(bom)) {
 178                     for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = CFSwapInt16(uniChars[idx]);
 179                 } else {
 180                     memmove(buffer->chars.unicode, uniChars, len * sizeof(UniChar));
 181                 }
 182             }
 183         }
 184         return true;
 185
 186     case kCFStringEncodingNonLossyASCII: {
 187         UTF16Char currentValue = 0;
 188         uint8_t character;
 189         int8_t mode = __NSNonLossyASCIIMode;
 190
 191         buffer->isASCII = false;
 192         buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 193         buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 194         buffer->numChars = 0;
 195
 196         while (chars < end) {
 197             character = (*chars++);
 198
 199             switch (mode) {
 200             case __NSNonLossyASCIIMode:
 201                 if (character == '\\') {
 202                     mode = __NSNonLossyBackslashMode;
 203                 } else if (character < 0x80) {
 204                     currentValue = character;
 205                 } else {
 206                     mode = __NSNonLossyErrorMode;
 207                 }
 208                 break;
 209
 210             case __NSNonLossyBackslashMode:
 211                 if ((character == 'U') || (character == 'u')) {
 212                     mode = __NSNonLossyHexInitialMode;
 213                     currentValue = 0;
 214                 } else if ((character >= '0') && (character <= '9')) {
 215                     mode = __NSNonLossyOctalInitialMode;
 216                     currentValue = character - '0';
 217                 } else if (character == '\\') {
 218                     mode = __NSNonLossyASCIIMode;
 219                     currentValue = character;
 220                 } else {
 221                     mode = __NSNonLossyErrorMode;
 222                 }
 223                 break;
 224
 225             default:
 226                 if (mode < __NSNonLossyHexFinalMode) {
 227                     if ((character >= '0') && (character <= '9')) {
 228                         currentValue = (currentValue << 4) | (character - '0');
 229                         if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
 230                     } else {
 231                         if (character >= 'a') character -= ('a' - 'A');
 232                         if ((character >= 'A') && (character <= 'F')) {
 233                             currentValue = (currentValue << 4) | ((character - 'A') + 10);
 234                             if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
 235                         } else {
 236                             mode = __NSNonLossyErrorMode;
 237                         }
 238                     }
 239                 } else {
 240                     if ((character >= '0') && (character <= '9')) {
 241                         currentValue = (currentValue << 3) | (character - '0');
 242                         if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
 243                     } else {
 244                         mode = __NSNonLossyErrorMode;
 245                     }
 246                 }
 247                 break;
 248             }
 249
 250             if (mode == __NSNonLossyASCIIMode) {
 251                 buffer->chars.unicode[buffer->numChars++] = currentValue;
 252             } else if (mode == __NSNonLossyErrorMode) {
 253                 return false;
 254             }
 255         }
 256         return (mode == __NSNonLossyASCIIMode);
 257     }
 258
 259     case kCFStringEncodingUTF8:
 260         if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) {     // If UTF8 BOM, skip
 261             chars += 3;
 262             len -= 3;
 263             if (0 == len) return true;
 264         }
 265         allASCII = !alwaysUnicode;
 266         if (allASCII) {
 267             for (idx = 0; idx < len; idx++) {
 268                 if (128 <= chars[idx]) {
 269                     allASCII = false;
 270                     break;
 271                 }
 272             }
 273         }
 274         buffer->isASCII = allASCII;
 275         if (allASCII) {
 276             buffer->numChars = len;
 277             buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 278             buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 279             memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 280         } else {
 281             UInt32 numDone;
 282             static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
 283
 284             if (!__CFFromUTF8) {
 285                 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
 286                 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
 287             }
 288
 289             buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 290             buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 291             buffer->numChars = 0;
 292             while (chars < end) {
 293                 numDone = 0;
 294                 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
 295
 296                 if (0 == numDone) {
 297                     if (buffer->shouldFreeChars) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
 298                     buffer->isASCII = !alwaysUnicode;
 299                     buffer->shouldFreeChars = false;
 300                     buffer->chars.ascii = NULL;
 301                     buffer->numChars = 0;
 302                     return false;
 303                 }
 304                 buffer->numChars += numDone;
 305             }
 306         }
 307         return true;
 308
 309     default:
 310         if (CFStringEncodingIsValidEncoding(encoding)) {
 311             const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
 312             Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
 313
 314             if (!converter) return false;
 315
 316             if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
 317                 allASCII = !alwaysUnicode && isASCIISuperset;
 318                     if (allASCII) {
 319                         for (idx = 0; idx < len; idx++) {
 320                             if (128 <= chars[idx]) {
 321                                 allASCII = false;
 322                                 break;
 323                             }
 324                         }
 325                     }
 326                     buffer->isASCII = allASCII;
 327                     if (allASCII) {
 328                         buffer->numChars = len;
 329                         buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 330                         buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 331                         memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 332                     } else {
 333                         buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 334                         buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 335                         buffer->numChars = len;
 336                         if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
 337                             for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
 338                         } else {
 339                             for (idx = 0; idx < len; idx++)
 340                                 if (chars[idx] < 0x80 && isASCIISuperset)
 341                                     buffer->chars.unicode[idx] = (UniChar)chars[idx];
 342                                 else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx))
 343                                     return false;
 344                         }
 345                     }
 346                     return true;
 347             } else {
 348                 allASCII = !alwaysUnicode && isASCIISuperset;
 349                 if (allASCII) {
 350                     for (idx = 0; idx < len; idx++)
 351                         if (128 <= chars[idx]) {
 352                             allASCII = false;
 353                             break;
 354                         }
 355                 }
 356                 buffer->isASCII = allASCII;
 357                 if (allASCII) {
 358                     buffer->numChars = len;
 359                     buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 360                     buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 361                     memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 362                 } else {
 363                     UInt32 guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
 364                     static UInt32 lossyFlag = (UInt32)-1;
 365
 366                     buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
 367                     buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
 368
 369                     if (lossyFlag == (UInt32)-1) lossyFlag = (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? 0 : kCFStringEncodingAllowLossyConversion);
 370
 371                     if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) {
 372                         if (buffer->shouldFreeChars) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
 373                         buffer->isASCII = !alwaysUnicode;
 374                         buffer->shouldFreeChars = false;
 375                         buffer->chars.ascii = NULL;
 376                         buffer->numChars = 0;
 377                         return false;
 378                     }
 379                 }
 380                 return true;
 381             }
 382         } else {
 383             return false;
 384         }
 385     }
 386 }
 387
 388
 389 /* Create a byte stream from a CFString backing. Can convert a string piece at a time
 390    into a fixed size buffer. Returns number of characters converted.
 391    Characters that cannot be converted to the specified encoding are represented
 392    with the char specified by lossByte; if 0, then lossy conversion is not allowed
 393    and conversion stops, returning partial results.
 394    Pass buffer==NULL if you don't care about the converted string (but just the convertability,
 395    or number of bytes required, indicated by usedBufLen).
 396    Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
 397
 398    Note: This function is intended to work through CFString functions, so it should work
 399    with NSStrings as well as CFStrings.
 400 */
 401 CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
 402     CFIndex totalBytesWritten = 0;      /* Number of written bytes */
 403     CFIndex numCharsProcessed = 0;      /* Number of processed chars */
 404     const UniChar *unichars;
 405
 406     if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
 407         static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
 408
 409         if (!__CFToUTF8) {
 410             const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
 411             __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
 412         }
 413         numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
 414
 415     } else if (encoding == kCFStringEncodingNonLossyASCII) {
 416         const char *hex = "0123456789abcdef";
 417         UniChar ch;
 418         CFStringInlineBuffer buf;
 419         CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
 420         while (numCharsProcessed < rangeLen) {
 421             CFIndex reqLength; /* Required number of chars to encode this UniChar */
 422             CFIndex cnt;
 423             char tmp[6];
 424             ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
 425             if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
 426                 reqLength = 1;
 427                 tmp[0] = ch;
 428             } else {
 429                 if (ch == '\\') {
 430                     tmp[1] = '\\';
 431                     reqLength = 2;
 432                 } else if (ch < 256) {  /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
 433                     tmp[1] = '0' + (ch >> 6);
 434                     tmp[2] = '0' + ((ch >> 3) & 7);
 435                     tmp[3] = '0' + (ch & 7);
 436                     reqLength = 4;
 437                 } else {        /* \Unnnn */
 438                     tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
 439                     tmp[2] = hex[(ch >> 12) & 0x0f];
 440                     tmp[3] = hex[(ch >> 8) & 0x0f];
 441                     tmp[4] = hex[(ch >> 4) & 0x0f];
 442                     tmp[5] = hex[ch & 0x0f];
 443                     reqLength = 6;
 444                 }
 445                 tmp[0] = '\\';
 446             }
 447             if (buffer) {
 448                 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
 449 .*/
 450                 for (cnt = 0; cnt < reqLength; cnt++) {
 451                     buffer[totalBytesWritten + cnt] = tmp[cnt];
 452                 }
 453             }
 454             totalBytesWritten += reqLength;
 455             numCharsProcessed++;
 456         }
 457     } else if (encoding == kCFStringEncodingUnicode) {
 458         CFIndex extraForBOM = generatingExternalFile ? sizeof(UniChar) : 0;
 459         numCharsProcessed = rangeLen;
 460         if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
 461             numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
 462         }
 463         totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
 464         if (buffer) {
 465             if (generatingExternalFile) {       /* Generate BOM */
 466 #if defined(__BIG_ENDIAN__)
 467                 *buffer++ = 0xfe; *buffer++ = 0xff;
 468 #else
 469                 *buffer++ = 0xff; *buffer++ = 0xfe;
 470 #endif
 471             }
 472             CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
 473         }
 474     } else {
 475         CFIndex numChars;
 476         UInt32 flags;
 477         const unsigned char *cString = NULL;
 478
 479         if (!CF_IS_OBJC(CFStringGetTypeID(), string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // Checking for NSString to avoid infinite recursion
 480             const unsigned char *ptr;
 481             if ((cString = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
 482                 ptr = (cString += rangeLoc);
 483                 if (__CFStringGetEightBitStringEncoding() == encoding) {
 484                     numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
 485                     if (buffer) memmove(buffer, cString, numCharsProcessed);
 486                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 487                     return numCharsProcessed;
 488                 }
 489                 while (*ptr < 0x80 && rangeLen > 0) {
 490                     ++ptr;
 491                     --rangeLen;
 492                 }
 493                 numCharsProcessed = ptr - cString;
 494                 if (buffer) {
 495                     numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
 496                     memmove(buffer, cString, numCharsProcessed);
 497                     buffer += numCharsProcessed;
 498                     max -= numCharsProcessed;
 499                 }
 500                 if (!rangeLen || (buffer && (max == 0))) {
 501                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 502                     return numCharsProcessed;
 503                 }
 504                 rangeLoc += numCharsProcessed;
 505                 totalBytesWritten += numCharsProcessed;
 506             }
 507             if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
 508                 ptr = (cString += (rangeLoc + 1));
 509                 if (__CFStringGetEightBitStringEncoding() == encoding) {
 510                     numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
 511                     if (buffer) memmove(buffer, cString, numCharsProcessed);
 512                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 513                     return numCharsProcessed;
 514                 }
 515                 while (*ptr < 0x80 && rangeLen > 0) {
 516                     ++ptr;
 517                     --rangeLen;
 518                 }
 519                 numCharsProcessed = ptr - cString;
 520                 if (buffer) {
 521                     numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
 522                     memmove(buffer, cString, numCharsProcessed);
 523                     buffer += numCharsProcessed;
 524                     max -= numCharsProcessed;
 525                 }
 526                 if (!rangeLen || (buffer && (max == 0))) {
 527                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 528                     return numCharsProcessed;
 529                 }
 530                 rangeLoc += numCharsProcessed;
 531                 totalBytesWritten += numCharsProcessed;
 532             }
 533         }
 534
 535         if (!buffer) max = 0;
 536
 537         // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
 538         flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | __CFGetASCIICompatibleFlag();
 539
 540         if (!cString && (cString = (const char*)CFStringGetCharactersPtr(string))) { // Must be Unicode string
 541             if (CFStringEncodingIsValidEncoding(encoding)) { // Converter available in CF
 542                 CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar*)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
 543             } else {
 544                 return 0;
 545             }
 546         } else {
 547             UniChar charBuf[kCFCharConversionBufferLength];
 548             UInt32 currentLength;
 549             UInt32 usedLen;
 550             uint32_t lastUsedLen = 0, lastNumChars = 0;
 551             uint32_t result;
 552             Boolean isCFBuiltin = CFStringEncodingIsValidEncoding(encoding);
 553 #define MAX_DECOMP_LEN (6)
 554
 555             while (rangeLen > 0) {
 556                 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
 557                 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
 558
 559                 // could be in the middle of surrogate pair; back up.
 560                 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
 561
 562                 if (isCFBuiltin) { // Converter available in CF
 563                     if ((result = CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, currentLength, &numChars, buffer, max, &usedLen)) != kCFStringEncodingConversionSuccess) {
 564                         if (kCFStringEncodingInvalidInputStream == result) {
 565                             CFRange composedRange;
 566                             // Check the tail
 567                             if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
 568                                 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
 569
 570                                 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
 571                                     result = CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
 572                                 }
 573                             }
 574
 575                             // Check the head
 576                             if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
 577                                 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
 578
 579                                 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
 580                                     // Try if the composed range can be converted
 581                                     CFStringGetCharacters(string, composedRange, charBuf);
 582
 583                                     if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
 584                                         CFIndex lastRangeLoc = rangeLoc - lastNumChars;
 585
 586                                         currentLength = composedRange.location - lastRangeLoc;
 587                                         CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
 588
 589                                         if ((result = CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen)) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
 590                                             // Looks good. back up
 591                                             totalBytesWritten -= lastUsedLen;
 592                                             numCharsProcessed -= lastNumChars;
 593
 594                                             rangeLoc = lastRangeLoc;
 595                                             rangeLen += lastNumChars;
 596
 597                                             if (max) {
 598                                                 buffer -= lastUsedLen;
 599                                                 max += lastUsedLen;
 600                                             }
 601                                         }
 602                                     }
 603                                 }
 604                             }
 605                         }
 606
 607                         if (kCFStringEncodingConversionSuccess != result) { // really failed
 608                             totalBytesWritten += usedLen;
 609                             numCharsProcessed += numChars;
 610                             break;
 611                         }
 612                     }
 613                 } else {
 614                     return 0;
 615                 }
 616
 617                 totalBytesWritten += usedLen;
 618                 numCharsProcessed += numChars;
 619
 620                 rangeLoc += numChars;
 621                 rangeLen -= numChars;
 622                 if (max) {
 623                     buffer += usedLen;
 624                     max -= usedLen;
 625                     if (max <= 0) break;
 626                 }
 627                 lastUsedLen = usedLen; lastNumChars = numChars;
 628                 flags &= ~kCFStringEncodingPrependBOM;
 629             }
 630         }
 631     }
 632     if (usedBufLen) *usedBufLen = totalBytesWritten;
 633     return numCharsProcessed;
 634 }
 635
 636 #define MAX_STACK_BUFFER_LEN    (255)
 637 CF_EXPORT Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
 638 #if defined(__MACH__)
 639     const UTF16Char *characters = CFStringGetCharactersPtr(string);
 640     uint32_t usedBufLen;
 641
 642     if (NULL == characters) {
 643         CFIndex length = CFStringGetLength(string);
 644
 645         if (length > MAX_STACK_BUFFER_LEN) {
 646             UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
 647             CFRange range = CFRangeMake(0, MAX_STACK_BUFFER_LEN);
 648             uint32_t localUsedBufLen;
 649
 650             usedBufLen = 0;
 651
 652             while (length > 0) {
 653                 CFStringGetCharacters(string, range, charactersBuffer);
 654                 if (CFUniCharIsSurrogateHighCharacter(charactersBuffer[range.length - 1])) --range.length; // Backup for a high surrogate
 655
 656                 if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, maxBufLen - usedBufLen, &localUsedBufLen, true, kCFUniCharUTF8Format, true)) return false;
 657                 buffer += localUsedBufLen;
 658                 usedBufLen += localUsedBufLen;
 659
 660                 length -= range.length;
 661                 range.location += range.length;
 662                 range.length = (length < MAX_STACK_BUFFER_LEN ? length : MAX_STACK_BUFFER_LEN);
 663             }
 664         } else {
 665             UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN]; // C99 Variable array
 666
 667             CFStringGetCharacters(string, CFRangeMake(0, length), charactersBuffer);
 668             if (!CFUniCharDecompose(charactersBuffer, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
 669             buffer += usedBufLen;
 670         }
 671     } else {
 672         if (!CFUniCharDecompose(characters, CFStringGetLength(string), NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
 673         buffer += usedBufLen;
 674     }
 675
 676     if (usedBufLen < (uint32_t)maxBufLen) { // Since the filename has its own limit, this is ok for now
 677         *buffer = '\0';
 678         return true;
 679     } else {
 680         return false;
 681     }
 682 #else __MACH__
 683     return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
 684 #endif __MACH__
 685 }