CFStringEncodings.c

   1 /*
   2  * Copyright (c) 2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*      CFStringEncodings.c
  25         Copyright (c) 1999-2011, Apple Inc. All rights reserved.
  26         Responsibility: Aki Inoue
  27 */
  28
  29 #include "CFInternal.h"
  30 #include <CoreFoundation/CFString.h>
  31 #include <CoreFoundation/CFByteOrder.h>
  32 #include <CoreFoundation/CFPriv.h>
  33 #include <string.h>
  34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
  35 #include <CoreFoundation/CFUniChar.h>
  36 #include <CoreFoundation/CFUnicodeDecomposition.h>
  37 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
  38 #include <stdlib.h>
  39 #include <fcntl.h>
  40 #include <pwd.h>
  41 #include <sys/param.h>
  42 #include <unistd.h>
  43 #include <string.h>
  44 #include <stdio.h>
  45 #include <xlocale.h>
  46 #include <CoreFoundation/CFStringDefaultEncoding.h>
  47 #endif
  48
  49 static UInt32 __CFWantsToUseASCIICompatibleConversion = (UInt32)-1;
  50 CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) {
  51     if (__CFWantsToUseASCIICompatibleConversion == (UInt32)-1) {
  52         __CFWantsToUseASCIICompatibleConversion = false;
  53     }
  54     return (__CFWantsToUseASCIICompatibleConversion ? kCFStringEncodingASCIICompatibleConversion : 0);
  55 }
  56
  57 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
  58     __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
  59 }
  60
  61 Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
  62
  63 // To avoid early initialization issues, we just initialize this here
  64 // This should not be const as it is changed
  65 __private_extern__ UniChar __CFCharToUniCharTable[256] = {
  66   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
  67  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
  68  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
  69  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
  70  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
  71  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
  72  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
  73 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
  74 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
  75 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
  76 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
  77 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
  78 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
  79 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
  80 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
  81 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
  82 };
  83
  84 __private_extern__ void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
  85     if (__CFCharToUniCharFunc != func) {
  86         int ch;
  87         __CFCharToUniCharFunc = func;
  88         if (func) {
  89             for (ch = 128; ch < 256; ch++) {
  90                 UniChar uch;
  91                 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
  92             }
  93         } else {        // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
  94             for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
  95         }
  96     }
  97 }
  98
  99 __private_extern__ void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
 100     CFIndex idx;
 101     for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
 102 }
 103
 104
 105 /* The minimum length the output buffers should be in the above functions
 106 */
 107 #define kCFCharConversionBufferLength 512
 108
 109
 110 #define MAX_LOCAL_CHARS         (sizeof(buffer->localBuffer) / sizeof(uint8_t))
 111 #define MAX_LOCAL_UNICHARS      (sizeof(buffer->localBuffer) / sizeof(UniChar))
 112
 113 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
 114 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
 115 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
 116 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
 117 !!! converterFlags is only used for the UTF8 converter at this point
 118 */
 119 Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
 120     return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
 121 }
 122
 123 enum {
 124     __NSNonLossyErrorMode = -1,
 125     __NSNonLossyASCIIMode = 0,
 126     __NSNonLossyBackslashMode = 1,
 127     __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
 128     __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
 129     __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
 130     __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
 131 };
 132
 133 Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
 134     CFIndex idx;
 135     const uint8_t *chars = (const uint8_t *)bytes;
 136     const uint8_t *end = chars + len;
 137     Boolean result = TRUE;
 138
 139     if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
 140
 141     buffer->isASCII = !alwaysUnicode;
 142     buffer->shouldFreeChars = false;
 143     buffer->numChars = 0;
 144
 145     if (0 == len) return true;
 146
 147     buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
 148
 149     if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16
 150         const UTF16Char *src = (const UTF16Char *)bytes;
 151         const UTF16Char *limit = src + (len / sizeof(UTF16Char)); // <rdar://problem/7854378> avoiding odd len issue
 152         bool swap = false;
 153
 154         if (kCFStringEncodingUTF16 == encoding) {
 155             UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);
 156
 157 #if __CF_BIG_ENDIAN__
 158             if (bom == 0xFFFE) swap = true;
 159 #else
 160             if (bom != 0xFEFF) swap = true;
 161 #endif
 162             if (bom) useClientsMemoryPtr = NULL;
 163         } else {
 164 #if __CF_BIG_ENDIAN__
 165             if (kCFStringEncodingUTF16LE == encoding) swap = true;
 166 #else
 167             if (kCFStringEncodingUTF16BE == encoding) swap = true;
 168 #endif
 169         }
 170
 171         buffer->numChars = limit - src;
 172
 173         if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
 174             *useClientsMemoryPtr = true;
 175             buffer->chars.unicode = (UniChar *)src;
 176             buffer->isASCII = false;
 177         } else {
 178             if (buffer->isASCII) {      // Let's see if we can reduce the Unicode down to ASCII...
 179                 const UTF16Char *characters = src;
 180                 UTF16Char mask = (swap ? 0x80FF : 0xFF80);
 181
 182                 while (characters < limit) {
 183                     if (*(characters++) & mask) {
 184                         buffer->isASCII = false;
 185                         break;
 186                     }
 187                 }
 188             }
 189
 190             if (buffer->isASCII) {
 191                 uint8_t *dst;
 192                 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
 193                     if (buffer->numChars > MAX_LOCAL_CHARS) {
 194                         buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
 195                         if (!buffer->chars.ascii) goto memoryErrorExit;
 196                         buffer->shouldFreeChars = true;
 197                     } else {
 198                         buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
 199                     }
 200                 }
 201                 dst = buffer->chars.ascii;
 202
 203                 if (swap) {
 204                     while (src < limit) *(dst++) = (*(src++) >> 8);
 205                 } else {
 206                     while (src < limit) *(dst++) = (uint8_t)*(src++);
 207                 }
 208             } else {
 209                 UTF16Char *dst;
 210
 211                 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
 212                     if (buffer->numChars > MAX_LOCAL_UNICHARS) {
 213                         buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
 214                         if (!buffer->chars.unicode) goto memoryErrorExit;
 215                         buffer->shouldFreeChars = true;
 216                     } else {
 217                         buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
 218                     }
 219                 }
 220                 dst = buffer->chars.unicode;
 221
 222                 if (swap) {
 223                     while (src < limit) *(dst++) = CFSwapInt16(*(src++));
 224                 } else {
 225                     memmove(dst, src, buffer->numChars * sizeof(UTF16Char));
 226                 }
 227             }
 228         }
 229     } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
 230         const UTF32Char *src = (const UTF32Char *)bytes;
 231         const UTF32Char *limit =  src + (len / sizeof(UTF32Char)); // <rdar://problem/7854378> avoiding odd len issue
 232         bool swap = false;
 233         static bool strictUTF32 = (bool)-1;
 234
 235         if ((bool)-1 == strictUTF32) strictUTF32 = (_CFExecutableLinkedOnOrAfter(CFSystemVersionLeopard) != 0);
 236
 237         if (kCFStringEncodingUTF32 == encoding) {
 238             UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);
 239
 240 #if __CF_BIG_ENDIAN__
 241             if (bom == 0xFFFE0000) swap = true;
 242 #else
 243             if (bom != 0x0000FEFF) swap = true;
 244 #endif
 245         } else {
 246 #if __CF_BIG_ENDIAN__
 247             if (kCFStringEncodingUTF32LE == encoding) swap = true;
 248 #else
 249             if (kCFStringEncodingUTF32BE == encoding) swap = true;
 250 #endif
 251         }
 252
 253         buffer->numChars = limit - src;
 254
 255         {
 256             // Let's see if we have non-ASCII or non-BMP
 257             const UTF32Char *characters = src;
 258             UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);
 259             UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);
 260
 261             while (characters < limit) {
 262                 if (*characters & asciiMask) {
 263                     buffer->isASCII = false;
 264                     if (*characters & bmpMask) {
 265                         if (strictUTF32 && ((swap ? (UTF32Char)CFSwapInt32(*characters) : *characters) > 0x10FFFF)) return false; // outside of Unicode Scaler Value. Haven't allocated buffer, yet.
 266                         ++(buffer->numChars);
 267                     }
 268                 }
 269                 ++characters;
 270             }
 271         }
 272
 273         if (buffer->isASCII) {
 274             uint8_t *dst;
 275             if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
 276                 if (buffer->numChars > MAX_LOCAL_CHARS) {
 277                     buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
 278                     if (!buffer->chars.ascii) goto memoryErrorExit;
 279                     buffer->shouldFreeChars = true;
 280                 } else {
 281                     buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
 282                 }
 283             }
 284             dst = buffer->chars.ascii;
 285
 286             if (swap) {
 287                 while (src < limit) *(dst++) = (*(src++) >> 24);
 288             } else {
 289                 while (src < limit) *(dst++) = *(src++);
 290             }
 291         } else {
 292             if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
 293                 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
 294                     buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
 295                     if (!buffer->chars.unicode) goto memoryErrorExit;
 296                     buffer->shouldFreeChars = true;
 297                 } else {
 298                     buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
 299                 }
 300             }
 301             result = (CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, (strictUTF32 ? false : true), __CF_BIG_ENDIAN__ ? !swap : swap) ? TRUE : FALSE);
 302         }
 303     } else if (kCFStringEncodingUTF8 == encoding) {
 304         if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) {     // If UTF8 BOM, skip
 305             chars += 3;
 306             len -= 3;
 307             if (0 == len) return true;
 308         }
 309         if (buffer->isASCII) {
 310             for (idx = 0; idx < len; idx++) {
 311                 if (128 <= chars[idx]) {
 312                     buffer->isASCII = false;
 313                     break;
 314                 }
 315             }
 316         }
 317         if (buffer->isASCII) {
 318             buffer->numChars = len;
 319             buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 320             buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 321             if (!buffer->chars.ascii) goto memoryErrorExit;
 322             memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 323         } else {
 324             CFIndex numDone;
 325             static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
 326
 327             if (!__CFFromUTF8) {
 328                 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
 329                 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
 330             }
 331
 332             buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 333             buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 334             if (!buffer->chars.unicode) goto memoryErrorExit;
 335             buffer->numChars = 0;
 336             while (chars < end) {
 337                 numDone = 0;
 338                 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
 339
 340                 if (0 == numDone) {
 341                     result = FALSE;
 342                     break;
 343                 }
 344                 buffer->numChars += numDone;
 345             }
 346         }
 347     } else if (kCFStringEncodingNonLossyASCII == encoding) {
 348         UTF16Char currentValue = 0;
 349         uint8_t character;
 350         int8_t mode = __NSNonLossyASCIIMode;
 351
 352         buffer->isASCII = false;
 353         buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 354         buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 355         if (!buffer->chars.unicode) goto memoryErrorExit;
 356         buffer->numChars = 0;
 357
 358         while (chars < end) {
 359             character = (*chars++);
 360
 361             switch (mode) {
 362                 case __NSNonLossyASCIIMode:
 363                     if (character == '\\') {
 364                         mode = __NSNonLossyBackslashMode;
 365                     } else if (character < 0x80) {
 366                         currentValue = character;
 367                     } else {
 368                         mode = __NSNonLossyErrorMode;
 369                     }
 370                     break;
 371
 372                     case __NSNonLossyBackslashMode:
 373                     if ((character == 'U') || (character == 'u')) {
 374                         mode = __NSNonLossyHexInitialMode;
 375                         currentValue = 0;
 376                     } else if ((character >= '0') && (character <= '9')) {
 377                         mode = __NSNonLossyOctalInitialMode;
 378                         currentValue = character - '0';
 379                     } else if (character == '\\') {
 380                         mode = __NSNonLossyASCIIMode;
 381                         currentValue = character;
 382                     } else {
 383                         mode = __NSNonLossyErrorMode;
 384                     }
 385                     break;
 386
 387                     default:
 388                     if (mode < __NSNonLossyHexFinalMode) {
 389                         if ((character >= '0') && (character <= '9')) {
 390                             currentValue = (currentValue << 4) | (character - '0');
 391                             if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
 392                         } else {
 393                             if (character >= 'a') character -= ('a' - 'A');
 394                             if ((character >= 'A') && (character <= 'F')) {
 395                                 currentValue = (currentValue << 4) | ((character - 'A') + 10);
 396                                 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
 397                             } else {
 398                                 mode = __NSNonLossyErrorMode;
 399                             }
 400                         }
 401                     } else {
 402                         if ((character >= '0') && (character <= '9')) {
 403                             currentValue = (currentValue << 3) | (character - '0');
 404                             if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
 405                         } else {
 406                             mode = __NSNonLossyErrorMode;
 407                         }
 408                     }
 409                     break;
 410             }
 411
 412             if (mode == __NSNonLossyASCIIMode) {
 413                 buffer->chars.unicode[buffer->numChars++] = currentValue;
 414             } else if (mode == __NSNonLossyErrorMode) {
 415                 break;
 416             }
 417         }
 418         result = ((mode == __NSNonLossyASCIIMode) ? YES : NO);
 419     } else {
 420         const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
 421
 422         if (!converter) return false;
 423
 424         Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
 425
 426         if (!isASCIISuperset) buffer->isASCII = false;
 427
 428         if (buffer->isASCII) {
 429             for (idx = 0; idx < len; idx++) {
 430                 if (128 <= chars[idx]) {
 431                     buffer->isASCII = false;
 432                     break;
 433                 }
 434             }
 435         }
 436
 437         if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
 438             if (buffer->isASCII) {
 439                 buffer->numChars = len;
 440                 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 441                 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 442                 if (!buffer->chars.ascii) goto memoryErrorExit;
 443                 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 444             } else {
 445                 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 446                 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 447                 if (!buffer->chars.unicode) goto memoryErrorExit;
 448                 buffer->numChars = len;
 449                 if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
 450                     for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
 451                 } else {
 452                     for (idx = 0; idx < len; idx++) {
 453                         if (chars[idx] < 0x80 && isASCIISuperset) {
 454                             buffer->chars.unicode[idx] = (UniChar)chars[idx];
 455                         } else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx)) {
 456                             result = FALSE;
 457                             break;
 458                         }
 459                     }
 460                 }
 461             }
 462         } else {
 463             if (buffer->isASCII) {
 464                 buffer->numChars = len;
 465                 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 466                 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 467                 if (!buffer->chars.ascii) goto memoryErrorExit;
 468                 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 469             } else {
 470                 CFIndex guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
 471                 static UInt32 lossyFlag = (UInt32)-1;
 472
 473                 buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
 474                 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
 475                 if (!buffer->chars.unicode) goto memoryErrorExit;
 476
 477                 if (lossyFlag == (UInt32)-1) lossyFlag = 0;
 478
 479                 if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) result = FALSE;
 480             }
 481         }
 482     }
 483
 484     if (FALSE == result) {
 485 memoryErrorExit:        // Added for <rdar://problem/6581621>, but it's not clear whether an exception would be a better option
 486         result = FALSE; // In case we come here from a goto
 487         if (buffer->shouldFreeChars && buffer->chars.unicode) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
 488         buffer->isASCII = !alwaysUnicode;
 489         buffer->shouldFreeChars = false;
 490         buffer->chars.ascii = NULL;
 491         buffer->numChars = 0;
 492     }
 493     return result;
 494 }
 495
 496
 497 /* Create a byte stream from a CFString backing. Can convert a string piece at a time
 498    into a fixed size buffer. Returns number of characters converted.
 499    Characters that cannot be converted to the specified encoding are represented
 500    with the char specified by lossByte; if 0, then lossy conversion is not allowed
 501    and conversion stops, returning partial results.
 502    Pass buffer==NULL if you don't care about the converted string (but just the convertability,
 503    or number of bytes required, indicated by usedBufLen).
 504    Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
 505
 506    Note: This function is intended to work through CFString functions, so it should work
 507    with NSStrings as well as CFStrings.
 508 */
 509 CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
 510     CFIndex totalBytesWritten = 0;      /* Number of written bytes */
 511     CFIndex numCharsProcessed = 0;      /* Number of processed chars */
 512     const UniChar *unichars;
 513
 514     if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
 515         static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
 516
 517         if (!__CFToUTF8) {
 518             const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
 519             __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
 520         }
 521         numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
 522
 523     } else if (encoding == kCFStringEncodingNonLossyASCII) {
 524         const char *hex = "0123456789abcdef";
 525         UniChar ch;
 526         CFStringInlineBuffer buf;
 527         CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
 528         while (numCharsProcessed < rangeLen) {
 529             CFIndex reqLength; /* Required number of chars to encode this UniChar */
 530             CFIndex cnt;
 531             char tmp[6];
 532             ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
 533             if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
 534                 reqLength = 1;
 535                 tmp[0] = (char)ch;
 536             } else {
 537                 if (ch == '\\') {
 538                     tmp[1] = '\\';
 539                     reqLength = 2;
 540                 } else if (ch < 256) {  /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
 541                     tmp[1] = '0' + (ch >> 6);
 542                     tmp[2] = '0' + ((ch >> 3) & 7);
 543                     tmp[3] = '0' + (ch & 7);
 544                     reqLength = 4;
 545                 } else {        /* \Unnnn */
 546                     tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
 547                     tmp[2] = hex[(ch >> 12) & 0x0f];
 548                     tmp[3] = hex[(ch >> 8) & 0x0f];
 549                     tmp[4] = hex[(ch >> 4) & 0x0f];
 550                     tmp[5] = hex[ch & 0x0f];
 551                     reqLength = 6;
 552                 }
 553                 tmp[0] = '\\';
 554             }
 555             if (buffer) {
 556                 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
 557 .*/
 558                 for (cnt = 0; cnt < reqLength; cnt++) {
 559                     buffer[totalBytesWritten + cnt] = tmp[cnt];
 560                 }
 561             }
 562             totalBytesWritten += reqLength;
 563             numCharsProcessed++;
 564         }
 565     } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
 566         CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
 567         numCharsProcessed = rangeLen;
 568         if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
 569             numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
 570         }
 571         totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
 572         if (buffer) {
 573             if (extraForBOM) {  /* Generate BOM */
 574 #if __CF_BIG_ENDIAN__
 575                 *buffer++ = 0xfe; *buffer++ = 0xff;
 576 #else
 577                 *buffer++ = 0xff; *buffer++ = 0xfe;
 578 #endif
 579             }
 580             CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
 581             if ((__CF_BIG_ENDIAN__ ?  kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap
 582                 UTF16Char *characters = (UTF16Char *)buffer;
 583                 const UTF16Char *limit = characters + numCharsProcessed;
 584
 585                 while (characters < limit) {
 586                     *characters = CFSwapInt16(*characters);
 587                     ++characters;
 588                 }
 589             }
 590         }
 591     } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
 592         UTF32Char character;
 593         CFStringInlineBuffer buf;
 594         UTF32Char *characters = (UTF32Char *)buffer;
 595
 596         bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);
 597         if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
 598             totalBytesWritten += sizeof(UTF32Char);
 599             if (characters) {
 600                 if (totalBytesWritten > max) { // insufficient buffer
 601                     totalBytesWritten = 0;
 602                 } else {
 603                     *(characters++) = 0x0000FEFF;
 604                 }
 605             }
 606         }
 607
 608         CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
 609         while (numCharsProcessed < rangeLen) {
 610             character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
 611
 612             if (CFUniCharIsSurrogateHighCharacter(character)) {
 613                 UTF16Char otherCharacter;
 614
 615                 if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
 616                     character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
 617                 } else if (lossByte) {
 618                     character = lossByte;
 619                 } else {
 620                     break;
 621                 }
 622             } else if (CFUniCharIsSurrogateLowCharacter(character)) {
 623                 if (lossByte) {
 624                     character = lossByte;
 625                 } else {
 626                     break;
 627                 }
 628             }
 629
 630             totalBytesWritten += sizeof(UTF32Char);
 631
 632             if (characters) {
 633                 if (totalBytesWritten > max) {
 634                     totalBytesWritten -= sizeof(UTF32Char);
 635                     break;
 636                 }
 637                 *(characters++) = (swap ? CFSwapInt32(character) : character);
 638             }
 639
 640             numCharsProcessed += (character > 0xFFFF ? 2 : 1);
 641         }
 642     } else {
 643         CFIndex numChars;
 644         UInt32 flags;
 645         const unsigned char *cString = NULL;
 646         Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
 647
 648         if (!CFStringEncodingIsValidEncoding(encoding)) return 0;
 649
 650         if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
 651             const unsigned char *ptr;
 652             if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
 653                 ptr = (cString += rangeLoc);
 654                 if (__CFStringGetEightBitStringEncoding() == encoding) {
 655                     numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
 656                     if (buffer) memmove(buffer, cString, numCharsProcessed);
 657                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 658                     return numCharsProcessed;
 659                 }
 660
 661                 CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;
 662                 while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {
 663                     ++ptr;
 664                     --rangeLen;
 665                 }
 666                 numCharsProcessed = ptr - cString;
 667                 if (buffer) {
 668                     numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
 669                     memmove(buffer, cString, numCharsProcessed);
 670                     buffer += numCharsProcessed;
 671                     max -= numCharsProcessed;
 672                 }
 673                 if (!rangeLen || (buffer && (max == 0))) {
 674                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 675                     return numCharsProcessed;
 676                 }
 677                 rangeLoc += numCharsProcessed;
 678                 totalBytesWritten += numCharsProcessed;
 679             }
 680             if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
 681                 ptr = (cString += (rangeLoc + 1));
 682                 if (__CFStringGetEightBitStringEncoding() == encoding) {
 683                     numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
 684                     if (buffer) memmove(buffer, cString, numCharsProcessed);
 685                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 686                     return numCharsProcessed;
 687                 }
 688                 while (*ptr < 0x80 && rangeLen > 0) {
 689                     ++ptr;
 690                     --rangeLen;
 691                 }
 692                 numCharsProcessed = ptr - cString;
 693                 if (buffer) {
 694                     numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
 695                     memmove(buffer, cString, numCharsProcessed);
 696                     buffer += numCharsProcessed;
 697                     max -= numCharsProcessed;
 698                 }
 699                 if (!rangeLen || (buffer && (max == 0))) {
 700                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 701                     return numCharsProcessed;
 702                 }
 703                 rangeLoc += numCharsProcessed;
 704                 totalBytesWritten += numCharsProcessed;
 705             }
 706         }
 707
 708         if (!buffer) max = 0;
 709
 710         // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
 711         // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
 712         flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());
 713
 714         if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string
 715             CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
 716         } else {
 717             UniChar charBuf[kCFCharConversionBufferLength];
 718             CFIndex currentLength;
 719             CFIndex usedLen;
 720             CFIndex lastUsedLen = 0, lastNumChars = 0;
 721             uint32_t result;
 722             uint32_t streamingMask;
 723             uint32_t streamID = 0;
 724 #define MAX_DECOMP_LEN (6)
 725
 726             while (rangeLen > 0) {
 727                 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
 728                 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
 729
 730                 // could be in the middle of surrogate pair; back up.
 731                 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
 732
 733                 streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);
 734
 735                 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);
 736                 streamID = CFStringEncodingStreamIDFromMask(result);
 737                 result &= ~CFStringEncodingStreamIDMask;
 738
 739                 if (result != kCFStringEncodingConversionSuccess) {
 740                     if (kCFStringEncodingInvalidInputStream == result) {
 741                         CFRange composedRange;
 742                         // Check the tail
 743                         if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
 744                             composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
 745
 746                             if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
 747                                 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
 748                                 streamID = CFStringEncodingStreamIDFromMask(result);
 749                                 result &= ~CFStringEncodingStreamIDMask;
 750                             }
 751                         }
 752
 753                         // Check the head
 754                         if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
 755                             composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
 756
 757                             if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
 758                                 // Try if the composed range can be converted
 759                                 CFStringGetCharacters(string, composedRange, charBuf);
 760
 761                                 if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
 762                                     CFIndex lastRangeLoc = rangeLoc - lastNumChars;
 763
 764                                     currentLength = composedRange.location - lastRangeLoc;
 765                                     CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
 766
 767                                     result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);
 768                                     streamID = CFStringEncodingStreamIDFromMask(result);
 769                                     result &= ~CFStringEncodingStreamIDMask;
 770
 771                                     if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run
 772                                         // Looks good. back up
 773                                         totalBytesWritten -= lastUsedLen;
 774                                         numCharsProcessed -= lastNumChars;
 775
 776                                         rangeLoc = lastRangeLoc;
 777                                         rangeLen += lastNumChars;
 778
 779                                         if (max) {
 780                                             buffer -= lastUsedLen;
 781                                             max += lastUsedLen;
 782                                         }
 783                                     }
 784                                 }
 785                             }
 786                         }
 787                     }
 788
 789                     if (kCFStringEncodingConversionSuccess != result) { // really failed
 790                         totalBytesWritten += usedLen;
 791                         numCharsProcessed += numChars;
 792                         break;
 793                     }
 794                 }
 795
 796                 totalBytesWritten += usedLen;
 797                 numCharsProcessed += numChars;
 798
 799                 rangeLoc += numChars;
 800                 rangeLen -= numChars;
 801                 if (max) {
 802                     buffer += usedLen;
 803                     max -= usedLen;
 804                     if (max <= 0) break;
 805                 }
 806                 lastUsedLen = usedLen; lastNumChars = numChars;
 807                 flags &= ~kCFStringEncodingPrependBOM;
 808             }
 809         }
 810     }
 811     if (usedBufLen) *usedBufLen = totalBytesWritten;
 812     return numCharsProcessed;
 813 }
 814
 815 CFStringRef CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc, const char *buffer) {
 816     return CFStringCreateWithCString(alloc, buffer, CFStringFileSystemEncoding());
 817 }
 818
 819 CFIndex CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string) {
 820     CFIndex len = CFStringGetLength(string);
 821     CFStringEncoding enc = CFStringGetFastestEncoding(string);
 822     switch (enc) {
 823         case kCFStringEncodingASCII:
 824         case kCFStringEncodingMacRoman:
 825             if (len > (LONG_MAX - 1L) / 3L) return kCFNotFound;     // Avoid wrap-around
 826             return len * 3L + 1L;
 827         default:
 828             if (len > (LONG_MAX - 1L) / 9L) return kCFNotFound;     // Avoid wrap-around
 829             return len * 9L + 1L;
 830     }
 831 }
 832
 833 Boolean CFStringGetFileSystemRepresentation(CFStringRef string, char *buffer, CFIndex maxBufLen) {
 834 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
 835 #define MAX_STACK_BUFFER_LEN    (255)
 836     const UTF16Char *characters = CFStringGetCharactersPtr(string);
 837     const char *origBuffer = buffer;
 838     const char *bufferLimit = buffer + maxBufLen;
 839     CFIndex length = CFStringGetLength(string);
 840     CFIndex usedBufLen;
 841
 842     if (maxBufLen < length) return false; // Since we're using UTF-8, the byte length is never shorter than the char length. Also, it filters out 0 == maxBufLen
 843
 844     if (NULL == characters) {
 845         UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
 846         CFRange range = CFRangeMake(0, 0);
 847         const char *bytes = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding());
 848
 849         if (NULL != bytes) {
 850             const char *originalBytes = bytes;
 851             const char *bytesLimit = bytes + length;
 852
 853             while ((bytes < bytesLimit) && (buffer < bufferLimit) && (0 == (*bytes & 0x80))) *(buffer++) = *(bytes++);
 854
 855             range.location = bytes - originalBytes;
 856         }
 857         while ((range.location < length) && (buffer < bufferLimit)) {
 858             range.length = length - range.location;
 859             if (range.length > MAX_STACK_BUFFER_LEN) range.length = MAX_STACK_BUFFER_LEN;
 860
 861             CFStringGetCharacters(string, range, charactersBuffer);
 862             if ((range.length == MAX_STACK_BUFFER_LEN) && CFUniCharIsSurrogateHighCharacter(charactersBuffer[MAX_STACK_BUFFER_LEN - 1])) --range.length; // Backup for a high surrogate
 863
 864             if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, bufferLimit - buffer, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
 865
 866             buffer += usedBufLen;
 867             range.location += range.length;
 868         }
 869     } else {
 870         if (!CFUniCharDecompose(characters, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
 871         buffer += usedBufLen;
 872     }
 873
 874     if (buffer < bufferLimit) { // Since the filename has its own limit, this is ok for now
 875         *buffer = '\0';
 876         if (_CFExecutableLinkedOnOrAfter(CFSystemVersionLion)) {
 877             while (origBuffer < buffer) if (*origBuffer++ == 0) {       // There's a zero in there. Now see if the rest are all zeroes.
 878                 while (origBuffer < buffer) if (*origBuffer++ != 0) return false;       // Embedded NULLs should cause failure: <rdar://problem/5863219>
 879             }
 880         }
 881         return true;
 882     } else {
 883         return false;
 884     }
 885 #else
 886     return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
 887 #endif
 888 }
 889
 890 Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
 891     return CFStringGetFileSystemRepresentation(string, (char *)buffer, maxBufLen);
 892 }
 893
 894
 895 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
 896
 897 /* This function is used to obtain users' default script/region code.
 898    The function first looks at environment variable __kCFUserEncodingEnvVariableName, then, reads the configuration file in user's home directory.
 899 */
 900 void _CFStringGetUserDefaultEncoding(UInt32 *oScriptValue, UInt32 *oRegionValue) {
 901     char *stringValue;
 902     char buffer[__kCFMaxDefaultEncodingFileLength];
 903     int uid = getuid();
 904
 905     if ((stringValue = (char *)__CFgetenv(__kCFUserEncodingEnvVariableName)) != NULL) {
 906         if ((uid == strtol_l(stringValue, &stringValue, 0, NULL)) && (':' == *stringValue)) {
 907             ++stringValue;
 908         } else {
 909             stringValue = NULL;
 910         }
 911     }
 912
 913     if ((stringValue == NULL) && ((uid > 0) || __CFgetenv("HOME"))) {
 914         char passwdExtraBuf[1000 + MAXPATHLEN];  // Extra memory buffer for getpwuid_r(); no clue as to how large this should be...
 915         struct passwd passwdBuf, *passwdp = NULL;
 916
 917         switch (getpwuid_r((uid_t)uid, &passwdBuf, passwdExtraBuf, sizeof(passwdExtraBuf), &passwdp)) {
 918             case 0:         // Success
 919                 break;
 920             case ERANGE:    // Somehow we didn't give it enough memory; let the system handle the storage this time; but beware 5778609
 921                 passwdp = getpwuid((uid_t)uid);
 922                 break;
 923             default:
 924                 passwdp = NULL;
 925         }
 926         if (passwdp) {
 927             char filename[MAXPATHLEN + 1];
 928
 929             const char *path = NULL;
 930             if (!issetugid()) {
 931                 path = __CFgetenv("CFFIXED_USER_HOME");
 932             }
 933             if (!path) {
 934                 path = passwdp->pw_dir;
 935             }
 936
 937             strlcpy(filename, path, sizeof(filename));
 938             strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
 939
 940             int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
 941             int fd = open(filename, O_RDONLY, 0);
 942             if (fd == -1) {
 943                 // Cannot open the file. Let's fallback to smRoman/verUS
 944                 snprintf(filename, sizeof(filename), "0x%X:0:0", uid);
 945                 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
 946             } else {
 947                 ssize_t readSize;
 948                 readSize = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
 949                 buffer[(readSize < 0 ? 0 : readSize)] = '\0';
 950                 close(fd);
 951                 stringValue = buffer;
 952
 953                 // Well, we already have a buffer, let's reuse it
 954                 snprintf(filename, sizeof(filename), "0x%X:%s", uid, buffer);
 955                 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
 956             }
 957             if (-1 != no_hang_fd) close(no_hang_fd);
 958         }
 959     }
 960
 961     if (stringValue) {
 962         *oScriptValue = strtol_l(stringValue, &stringValue, 0, NULL);
 963         if (*stringValue == ':') {
 964             if (oRegionValue) *oRegionValue = strtol_l(++stringValue, NULL, 0, NULL);
 965             return;
 966         }
 967     }
 968
 969     // Falling back
 970     *oScriptValue = 0; // smRoman
 971     if (oRegionValue) *oRegionValue = 0; // verUS
 972 }
 973
 974 void _CFStringGetInstallationEncodingAndRegion(uint32_t *encoding, uint32_t *region) {
 975     char buffer[__kCFMaxDefaultEncodingFileLength];
 976     char *stringValue = NULL;
 977
 978     *encoding = 0;
 979     *region = 0;
 980
 981     struct passwd *passwdp = getpwuid((uid_t)0);
 982     if (passwdp) {
 983         const char *path = passwdp->pw_dir;
 984
 985         char filename[MAXPATHLEN + 1];
 986         strlcpy(filename, path, sizeof(filename));
 987         strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
 988
 989         int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
 990         int fd = open(filename, O_RDONLY, 0);
 991         if (0 <= fd) {
 992             ssize_t size = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
 993             buffer[(size < 0 ? 0 : size)] = '\0';
 994             close(fd);
 995             stringValue = buffer;
 996         }
 997         if (-1 != no_hang_fd) close(no_hang_fd);
 998     }
 999
1000     if (stringValue) {
1001         *encoding = strtol_l(stringValue, &stringValue, 0, NULL);
1002         if (*stringValue == ':') *region = strtol_l(++stringValue, NULL, 0, NULL);
1003     }
1004 }
1005
1006 Boolean _CFStringSaveUserDefaultEncoding(UInt32 iScriptValue, UInt32 iRegionValue) {
1007     Boolean success = false;
1008     struct passwd *passwdp = getpwuid(getuid());
1009     if (passwdp) {
1010         const char *path = passwdp->pw_dir;
1011         if (!issetugid()) {
1012             const char *value = __CFgetenv("CFFIXED_USER_HOME");
1013             if (value) path = value; // override
1014         }
1015
1016         char filename[MAXPATHLEN + 1];
1017         strlcpy(filename, path, sizeof(filename));
1018         strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
1019
1020         int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
1021         (void)unlink(filename);
1022         int fd = open(filename, O_WRONLY|O_CREAT, 0400);
1023         if (0 <= fd) {
1024             char buffer[__kCFMaxDefaultEncodingFileLength];
1025             size_t size = snprintf(buffer, __kCFMaxDefaultEncodingFileLength, "0x%X:0x%X", (unsigned int)iScriptValue, (unsigned int)iRegionValue);
1026             if (size <= __kCFMaxDefaultEncodingFileLength) {
1027                 int ret = write(fd, buffer, size);
1028                 if (size <= ret) success = true;
1029             }
1030             int save_err = errno;
1031             close(fd);
1032             errno = save_err;
1033         }
1034         int save_err = errno;
1035         if (-1 != no_hang_fd) close(no_hang_fd);
1036         errno = save_err;
1037     }
1038     return success;
1039 }
1040
1041 #endif
1042