CFStringEncodings.c

   1 /*
   2  * Copyright (c) 2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /*      CFStringEncodings.c
  24         Copyright (c) 1999-2009, Apple Inc. All rights reserved.
  25         Responsibility: Aki Inoue
  26 */
  27
  28 #include "CFInternal.h"
  29 #include <CoreFoundation/CFString.h>
  30 #include <CoreFoundation/CFByteOrder.h>
  31 #include <CoreFoundation/CFPriv.h>
  32 #include <string.h>
  33 #include <CoreFoundation/CFStringEncodingConverterExt.h>
  34 #include <CoreFoundation/CFUniChar.h>
  35 #include <CoreFoundation/CFUnicodeDecomposition.h>
  36 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
  37 #include <stdlib.h>
  38 #include <fcntl.h>
  39 #include <pwd.h>
  40 #include <sys/param.h>
  41 #include <unistd.h>
  42 #include <string.h>
  43 #include <stdio.h>
  44 #include <xlocale.h>
  45 #include <CoreFoundation/CFStringDefaultEncoding.h>
  46 #endif
  47
  48 static UInt32 __CFWantsToUseASCIICompatibleConversion = (UInt32)-1;
  49 CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) {
  50     if (__CFWantsToUseASCIICompatibleConversion == (UInt32)-1) {
  51         __CFWantsToUseASCIICompatibleConversion = false;
  52     }
  53     return (__CFWantsToUseASCIICompatibleConversion ? kCFStringEncodingASCIICompatibleConversion : 0);
  54 }
  55
  56 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
  57     __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
  58 }
  59
  60 Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
  61
  62 // To avoid early initialization issues, we just initialize this here
  63 // This should not be const as it is changed
  64 __private_extern__ UniChar __CFCharToUniCharTable[256] = {
  65   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
  66  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
  67  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
  68  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
  69  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
  70  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
  71  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
  72 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
  73 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
  74 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
  75 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
  76 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
  77 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
  78 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
  79 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
  80 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
  81 };
  82
  83 __private_extern__ void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
  84     if (__CFCharToUniCharFunc != func) {
  85         int ch;
  86         __CFCharToUniCharFunc = func;
  87         if (func) {
  88             for (ch = 128; ch < 256; ch++) {
  89                 UniChar uch;
  90                 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
  91             }
  92         } else {        // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
  93             for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
  94         }
  95     }
  96 }
  97
  98 __private_extern__ void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
  99     CFIndex idx;
 100     for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
 101 }
 102
 103
 104 /* The minimum length the output buffers should be in the above functions
 105 */
 106 #define kCFCharConversionBufferLength 512
 107
 108
 109 #define MAX_LOCAL_CHARS         (sizeof(buffer->localBuffer) / sizeof(uint8_t))
 110 #define MAX_LOCAL_UNICHARS      (sizeof(buffer->localBuffer) / sizeof(UniChar))
 111
 112 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
 113 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
 114 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
 115 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
 116 !!! converterFlags is only used for the UTF8 converter at this point
 117 */
 118 Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
 119     return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
 120 }
 121
 122 enum {
 123     __NSNonLossyErrorMode = -1,
 124     __NSNonLossyASCIIMode = 0,
 125     __NSNonLossyBackslashMode = 1,
 126     __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
 127     __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
 128     __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
 129     __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
 130 };
 131
 132 Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
 133     CFIndex idx;
 134     const uint8_t *chars = (const uint8_t *)bytes;
 135     const uint8_t *end = chars + len;
 136     Boolean result = TRUE;
 137
 138     if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
 139
 140     buffer->isASCII = !alwaysUnicode;
 141     buffer->shouldFreeChars = false;
 142     buffer->numChars = 0;
 143
 144     if (0 == len) return true;
 145
 146     buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
 147
 148     if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16
 149         const UTF16Char *src = (const UTF16Char *)bytes;
 150         const UTF16Char *limit = (const UTF16Char *)(bytes + len);
 151         bool swap = false;
 152
 153         if (kCFStringEncodingUTF16 == encoding) {
 154             UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);
 155
 156 #if __CF_BIG_ENDIAN__
 157             if (bom == 0xFFFE) swap = true;
 158 #else
 159             if (bom != 0xFEFF) swap = true;
 160 #endif
 161             if (bom) useClientsMemoryPtr = NULL;
 162         } else {
 163 #if __CF_BIG_ENDIAN__
 164             if (kCFStringEncodingUTF16LE == encoding) swap = true;
 165 #else
 166             if (kCFStringEncodingUTF16BE == encoding) swap = true;
 167 #endif
 168         }
 169
 170         buffer->numChars = limit - src;
 171
 172         if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
 173             *useClientsMemoryPtr = true;
 174             buffer->chars.unicode = (UniChar *)src;
 175             buffer->isASCII = false;
 176         } else {
 177             if (buffer->isASCII) {      // Let's see if we can reduce the Unicode down to ASCII...
 178                 const UTF16Char *characters = src;
 179                 UTF16Char mask = (swap ? 0x80FF : 0xFF80);
 180
 181                 while (characters < limit) {
 182                     if (*(characters++) & mask) {
 183                         buffer->isASCII = false;
 184                         break;
 185                     }
 186                 }
 187             }
 188
 189             if (buffer->isASCII) {
 190                 uint8_t *dst;
 191                 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
 192                     if (buffer->numChars > MAX_LOCAL_CHARS) {
 193                         buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
 194                         if (!buffer->chars.ascii) goto memoryErrorExit;
 195                         buffer->shouldFreeChars = true;
 196                     } else {
 197                         buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
 198                     }
 199                 }
 200                 dst = buffer->chars.ascii;
 201
 202                 if (swap) {
 203                     while (src < limit) *(dst++) = (*(src++) >> 8);
 204                 } else {
 205                     while (src < limit) *(dst++) = (uint8_t)*(src++);
 206                 }
 207             } else {
 208                 UTF16Char *dst;
 209
 210                 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
 211                     if (buffer->numChars > MAX_LOCAL_UNICHARS) {
 212                         buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
 213                         if (!buffer->chars.unicode) goto memoryErrorExit;
 214                         buffer->shouldFreeChars = true;
 215                     } else {
 216                         buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
 217                     }
 218                 }
 219                 dst = buffer->chars.unicode;
 220
 221                 if (swap) {
 222                     while (src < limit) *(dst++) = CFSwapInt16(*(src++));
 223                 } else {
 224                     memmove(dst, src, buffer->numChars * sizeof(UTF16Char));
 225                 }
 226             }
 227         }
 228     } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
 229         const UTF32Char *src = (const UTF32Char *)bytes;
 230         const UTF32Char *limit = (const UTF32Char *)(bytes + len);
 231         bool swap = false;
 232         static bool strictUTF32 = (bool)-1;
 233
 234         if ((bool)-1 == strictUTF32) strictUTF32 = (_CFExecutableLinkedOnOrAfter(CFSystemVersionLeopard) != 0);
 235
 236         if (kCFStringEncodingUTF32 == encoding) {
 237             UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);
 238
 239 #if __CF_BIG_ENDIAN__
 240             if (bom == 0xFFFE0000) swap = true;
 241 #else
 242             if (bom != 0x0000FEFF) swap = true;
 243 #endif
 244         } else {
 245 #if __CF_BIG_ENDIAN__
 246             if (kCFStringEncodingUTF32LE == encoding) swap = true;
 247 #else
 248             if (kCFStringEncodingUTF32BE == encoding) swap = true;
 249 #endif
 250         }
 251
 252         buffer->numChars = limit - src;
 253
 254         {
 255             // Let's see if we have non-ASCII or non-BMP
 256             const UTF32Char *characters = src;
 257             UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);
 258             UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);
 259
 260             while (characters < limit) {
 261                 if (*characters & asciiMask) {
 262                     buffer->isASCII = false;
 263                     if (*characters & bmpMask) {
 264                         if (strictUTF32 && ((swap ? (UTF32Char)CFSwapInt32(*characters) : *characters) > 0x10FFFF)) return false; // outside of Unicode Scaler Value. Haven't allocated buffer, yet.
 265                         ++(buffer->numChars);
 266                     }
 267                 }
 268                 ++characters;
 269             }
 270         }
 271
 272         if (buffer->isASCII) {
 273             uint8_t *dst;
 274             if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
 275                 if (buffer->numChars > MAX_LOCAL_CHARS) {
 276                     buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
 277                     if (!buffer->chars.ascii) goto memoryErrorExit;
 278                     buffer->shouldFreeChars = true;
 279                 } else {
 280                     buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
 281                 }
 282             }
 283             dst = buffer->chars.ascii;
 284
 285             if (swap) {
 286                 while (src < limit) *(dst++) = (*(src++) >> 24);
 287             } else {
 288                 while (src < limit) *(dst++) = *(src++);
 289             }
 290         } else {
 291             if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
 292                 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
 293                     buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
 294                     if (!buffer->chars.unicode) goto memoryErrorExit;
 295                     buffer->shouldFreeChars = true;
 296                 } else {
 297                     buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
 298                 }
 299             }
 300             result = (CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, (strictUTF32 ? false : true), __CF_BIG_ENDIAN__ ? !swap : swap) ? TRUE : FALSE);
 301         }
 302     } else if (kCFStringEncodingUTF8 == encoding) {
 303         if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) {     // If UTF8 BOM, skip
 304             chars += 3;
 305             len -= 3;
 306             if (0 == len) return true;
 307         }
 308         if (buffer->isASCII) {
 309             for (idx = 0; idx < len; idx++) {
 310                 if (128 <= chars[idx]) {
 311                     buffer->isASCII = false;
 312                     break;
 313                 }
 314             }
 315         }
 316         if (buffer->isASCII) {
 317             buffer->numChars = len;
 318             buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 319             buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 320             if (!buffer->chars.ascii) goto memoryErrorExit;
 321             memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 322         } else {
 323             CFIndex numDone;
 324             static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
 325
 326             if (!__CFFromUTF8) {
 327                 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
 328                 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
 329             }
 330
 331             buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 332             buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 333             if (!buffer->chars.unicode) goto memoryErrorExit;
 334             buffer->numChars = 0;
 335             while (chars < end) {
 336                 numDone = 0;
 337                 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
 338
 339                 if (0 == numDone) {
 340                     result = FALSE;
 341                     break;
 342                 }
 343                 buffer->numChars += numDone;
 344             }
 345         }
 346     } else if (kCFStringEncodingNonLossyASCII == encoding) {
 347         UTF16Char currentValue = 0;
 348         uint8_t character;
 349         int8_t mode = __NSNonLossyASCIIMode;
 350
 351         buffer->isASCII = false;
 352         buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 353         buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 354         if (!buffer->chars.unicode) goto memoryErrorExit;
 355         buffer->numChars = 0;
 356
 357         while (chars < end) {
 358             character = (*chars++);
 359
 360             switch (mode) {
 361                 case __NSNonLossyASCIIMode:
 362                     if (character == '\\') {
 363                         mode = __NSNonLossyBackslashMode;
 364                     } else if (character < 0x80) {
 365                         currentValue = character;
 366                     } else {
 367                         mode = __NSNonLossyErrorMode;
 368                     }
 369                     break;
 370
 371                     case __NSNonLossyBackslashMode:
 372                     if ((character == 'U') || (character == 'u')) {
 373                         mode = __NSNonLossyHexInitialMode;
 374                         currentValue = 0;
 375                     } else if ((character >= '0') && (character <= '9')) {
 376                         mode = __NSNonLossyOctalInitialMode;
 377                         currentValue = character - '0';
 378                     } else if (character == '\\') {
 379                         mode = __NSNonLossyASCIIMode;
 380                         currentValue = character;
 381                     } else {
 382                         mode = __NSNonLossyErrorMode;
 383                     }
 384                     break;
 385
 386                     default:
 387                     if (mode < __NSNonLossyHexFinalMode) {
 388                         if ((character >= '0') && (character <= '9')) {
 389                             currentValue = (currentValue << 4) | (character - '0');
 390                             if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
 391                         } else {
 392                             if (character >= 'a') character -= ('a' - 'A');
 393                             if ((character >= 'A') && (character <= 'F')) {
 394                                 currentValue = (currentValue << 4) | ((character - 'A') + 10);
 395                                 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
 396                             } else {
 397                                 mode = __NSNonLossyErrorMode;
 398                             }
 399                         }
 400                     } else {
 401                         if ((character >= '0') && (character <= '9')) {
 402                             currentValue = (currentValue << 3) | (character - '0');
 403                             if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
 404                         } else {
 405                             mode = __NSNonLossyErrorMode;
 406                         }
 407                     }
 408                     break;
 409             }
 410
 411             if (mode == __NSNonLossyASCIIMode) {
 412                 buffer->chars.unicode[buffer->numChars++] = currentValue;
 413             } else if (mode == __NSNonLossyErrorMode) {
 414                 break;
 415             }
 416         }
 417         result = ((mode == __NSNonLossyASCIIMode) ? YES : NO);
 418     } else {
 419         const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
 420
 421         if (!converter) return false;
 422
 423         Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
 424
 425         if (!isASCIISuperset) buffer->isASCII = false;
 426
 427         if (buffer->isASCII) {
 428             for (idx = 0; idx < len; idx++) {
 429                 if (128 <= chars[idx]) {
 430                     buffer->isASCII = false;
 431                     break;
 432                 }
 433             }
 434         }
 435
 436         if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
 437             if (buffer->isASCII) {
 438                 buffer->numChars = len;
 439                 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 440                 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 441                 if (!buffer->chars.ascii) goto memoryErrorExit;
 442                 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 443             } else {
 444                 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 445                 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 446                 if (!buffer->chars.unicode) goto memoryErrorExit;
 447                 buffer->numChars = len;
 448                 if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
 449                     for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
 450                 } else {
 451                     for (idx = 0; idx < len; idx++) {
 452                         if (chars[idx] < 0x80 && isASCIISuperset) {
 453                             buffer->chars.unicode[idx] = (UniChar)chars[idx];
 454                         } else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx)) {
 455                             result = FALSE;
 456                             break;
 457                         }
 458                     }
 459                 }
 460             }
 461         } else {
 462             if (buffer->isASCII) {
 463                 buffer->numChars = len;
 464                 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 465                 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 466                 if (!buffer->chars.ascii) goto memoryErrorExit;
 467                 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 468             } else {
 469                 CFIndex guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
 470                 static UInt32 lossyFlag = (UInt32)-1;
 471
 472                 buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
 473                 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
 474                 if (!buffer->chars.unicode) goto memoryErrorExit;
 475
 476                 if (lossyFlag == (UInt32)-1) lossyFlag = (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? 0 : kCFStringEncodingAllowLossyConversion);
 477
 478                 if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) result = FALSE;
 479             }
 480         }
 481     }
 482
 483     if (FALSE == result) {
 484 memoryErrorExit:        // Added for <rdar://problem/6581621>, but it's not clear whether an exception would be a better option
 485         result = FALSE; // In case we come here from a goto
 486         if (buffer->shouldFreeChars && buffer->chars.unicode) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
 487         buffer->isASCII = !alwaysUnicode;
 488         buffer->shouldFreeChars = false;
 489         buffer->chars.ascii = NULL;
 490         buffer->numChars = 0;
 491     }
 492     return result;
 493 }
 494
 495
 496 /* Create a byte stream from a CFString backing. Can convert a string piece at a time
 497    into a fixed size buffer. Returns number of characters converted.
 498    Characters that cannot be converted to the specified encoding are represented
 499    with the char specified by lossByte; if 0, then lossy conversion is not allowed
 500    and conversion stops, returning partial results.
 501    Pass buffer==NULL if you don't care about the converted string (but just the convertability,
 502    or number of bytes required, indicated by usedBufLen).
 503    Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
 504
 505    Note: This function is intended to work through CFString functions, so it should work
 506    with NSStrings as well as CFStrings.
 507 */
 508 CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
 509     CFIndex totalBytesWritten = 0;      /* Number of written bytes */
 510     CFIndex numCharsProcessed = 0;      /* Number of processed chars */
 511     const UniChar *unichars;
 512
 513     if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
 514         static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
 515
 516         if (!__CFToUTF8) {
 517             const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
 518             __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
 519         }
 520         numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
 521
 522     } else if (encoding == kCFStringEncodingNonLossyASCII) {
 523         const char *hex = "0123456789abcdef";
 524         UniChar ch;
 525         CFStringInlineBuffer buf;
 526         CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
 527         while (numCharsProcessed < rangeLen) {
 528             CFIndex reqLength; /* Required number of chars to encode this UniChar */
 529             CFIndex cnt;
 530             char tmp[6];
 531             ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
 532             if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
 533                 reqLength = 1;
 534                 tmp[0] = (char)ch;
 535             } else {
 536                 if (ch == '\\') {
 537                     tmp[1] = '\\';
 538                     reqLength = 2;
 539                 } else if (ch < 256) {  /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
 540                     tmp[1] = '0' + (ch >> 6);
 541                     tmp[2] = '0' + ((ch >> 3) & 7);
 542                     tmp[3] = '0' + (ch & 7);
 543                     reqLength = 4;
 544                 } else {        /* \Unnnn */
 545                     tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
 546                     tmp[2] = hex[(ch >> 12) & 0x0f];
 547                     tmp[3] = hex[(ch >> 8) & 0x0f];
 548                     tmp[4] = hex[(ch >> 4) & 0x0f];
 549                     tmp[5] = hex[ch & 0x0f];
 550                     reqLength = 6;
 551                 }
 552                 tmp[0] = '\\';
 553             }
 554             if (buffer) {
 555                 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
 556 .*/
 557                 for (cnt = 0; cnt < reqLength; cnt++) {
 558                     buffer[totalBytesWritten + cnt] = tmp[cnt];
 559                 }
 560             }
 561             totalBytesWritten += reqLength;
 562             numCharsProcessed++;
 563         }
 564     } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
 565         CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
 566         numCharsProcessed = rangeLen;
 567         if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
 568             numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
 569         }
 570         totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
 571         if (buffer) {
 572             if (extraForBOM) {  /* Generate BOM */
 573 #if __CF_BIG_ENDIAN__
 574                 *buffer++ = 0xfe; *buffer++ = 0xff;
 575 #else
 576                 *buffer++ = 0xff; *buffer++ = 0xfe;
 577 #endif
 578             }
 579             CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
 580             if ((__CF_BIG_ENDIAN__ ?  kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap
 581                 UTF16Char *characters = (UTF16Char *)buffer;
 582                 const UTF16Char *limit = characters + numCharsProcessed;
 583
 584                 while (characters < limit) {
 585                     *characters = CFSwapInt16(*characters);
 586                     ++characters;
 587                 }
 588             }
 589         }
 590     } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
 591         UTF32Char character;
 592         CFStringInlineBuffer buf;
 593         UTF32Char *characters = (UTF32Char *)buffer;
 594
 595         bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);
 596         if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
 597             totalBytesWritten += sizeof(UTF32Char);
 598             if (characters) {
 599                 if (totalBytesWritten > max) { // insufficient buffer
 600                     totalBytesWritten = 0;
 601                 } else {
 602                     *(characters++) = 0x0000FEFF;
 603                 }
 604             }
 605         }
 606
 607         CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
 608         while (numCharsProcessed < rangeLen) {
 609             character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
 610
 611             if (CFUniCharIsSurrogateHighCharacter(character)) {
 612                 UTF16Char otherCharacter;
 613
 614                 if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
 615                     character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
 616                 } else if (lossByte) {
 617                     character = lossByte;
 618                 } else {
 619                     break;
 620                 }
 621             } else if (CFUniCharIsSurrogateLowCharacter(character)) {
 622                 if (lossByte) {
 623                     character = lossByte;
 624                 } else {
 625                     break;
 626                 }
 627             }
 628
 629             totalBytesWritten += sizeof(UTF32Char);
 630
 631             if (characters) {
 632                 if (totalBytesWritten > max) {
 633                     totalBytesWritten -= sizeof(UTF32Char);
 634                     break;
 635                 }
 636                 *(characters++) = (swap ? CFSwapInt32(character) : character);
 637             }
 638
 639             numCharsProcessed += (character > 0xFFFF ? 2 : 1);
 640         }
 641     } else {
 642         CFIndex numChars;
 643         UInt32 flags;
 644         const unsigned char *cString = NULL;
 645         Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
 646
 647         if (!CFStringEncodingIsValidEncoding(encoding)) return 0;
 648
 649         if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
 650             const unsigned char *ptr;
 651             if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
 652                 ptr = (cString += rangeLoc);
 653                 if (__CFStringGetEightBitStringEncoding() == encoding) {
 654                     numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
 655                     if (buffer) memmove(buffer, cString, numCharsProcessed);
 656                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 657                     return numCharsProcessed;
 658                 }
 659
 660                 CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;
 661                 while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {
 662                     ++ptr;
 663                     --rangeLen;
 664                 }
 665                 numCharsProcessed = ptr - cString;
 666                 if (buffer) {
 667                     numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
 668                     memmove(buffer, cString, numCharsProcessed);
 669                     buffer += numCharsProcessed;
 670                     max -= numCharsProcessed;
 671                 }
 672                 if (!rangeLen || (buffer && (max == 0))) {
 673                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 674                     return numCharsProcessed;
 675                 }
 676                 rangeLoc += numCharsProcessed;
 677                 totalBytesWritten += numCharsProcessed;
 678             }
 679             if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
 680                 ptr = (cString += (rangeLoc + 1));
 681                 if (__CFStringGetEightBitStringEncoding() == encoding) {
 682                     numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
 683                     if (buffer) memmove(buffer, cString, numCharsProcessed);
 684                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 685                     return numCharsProcessed;
 686                 }
 687                 while (*ptr < 0x80 && rangeLen > 0) {
 688                     ++ptr;
 689                     --rangeLen;
 690                 }
 691                 numCharsProcessed = ptr - cString;
 692                 if (buffer) {
 693                     numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
 694                     memmove(buffer, cString, numCharsProcessed);
 695                     buffer += numCharsProcessed;
 696                     max -= numCharsProcessed;
 697                 }
 698                 if (!rangeLen || (buffer && (max == 0))) {
 699                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 700                     return numCharsProcessed;
 701                 }
 702                 rangeLoc += numCharsProcessed;
 703                 totalBytesWritten += numCharsProcessed;
 704             }
 705         }
 706
 707         if (!buffer) max = 0;
 708
 709         // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
 710         // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
 711         flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());
 712
 713         if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string
 714             CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
 715         } else {
 716             UniChar charBuf[kCFCharConversionBufferLength];
 717             CFIndex currentLength;
 718             CFIndex usedLen;
 719             CFIndex lastUsedLen = 0, lastNumChars = 0;
 720             uint32_t result;
 721             uint32_t streamingMask;
 722             uint32_t streamID = 0;
 723 #define MAX_DECOMP_LEN (6)
 724
 725             while (rangeLen > 0) {
 726                 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
 727                 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
 728
 729                 // could be in the middle of surrogate pair; back up.
 730                 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
 731
 732                 streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);
 733
 734                 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);
 735                 streamID = CFStringEncodingStreamIDFromMask(result);
 736                 result &= ~CFStringEncodingStreamIDMask;
 737
 738                 if (result != kCFStringEncodingConversionSuccess) {
 739                     if (kCFStringEncodingInvalidInputStream == result) {
 740                         CFRange composedRange;
 741                         // Check the tail
 742                         if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
 743                             composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
 744
 745                             if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
 746                                 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
 747                                 streamID = CFStringEncodingStreamIDFromMask(result);
 748                                 result &= ~CFStringEncodingStreamIDMask;
 749                             }
 750                         }
 751
 752                         // Check the head
 753                         if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
 754                             composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
 755
 756                             if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
 757                                 // Try if the composed range can be converted
 758                                 CFStringGetCharacters(string, composedRange, charBuf);
 759
 760                                 if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
 761                                     CFIndex lastRangeLoc = rangeLoc - lastNumChars;
 762
 763                                     currentLength = composedRange.location - lastRangeLoc;
 764                                     CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
 765
 766                                     result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);
 767                                     streamID = CFStringEncodingStreamIDFromMask(result);
 768                                     result &= ~CFStringEncodingStreamIDMask;
 769
 770                                     if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run
 771                                         // Looks good. back up
 772                                         totalBytesWritten -= lastUsedLen;
 773                                         numCharsProcessed -= lastNumChars;
 774
 775                                         rangeLoc = lastRangeLoc;
 776                                         rangeLen += lastNumChars;
 777
 778                                         if (max) {
 779                                             buffer -= lastUsedLen;
 780                                             max += lastUsedLen;
 781                                         }
 782                                     }
 783                                 }
 784                             }
 785                         }
 786                     }
 787
 788                     if (kCFStringEncodingConversionSuccess != result) { // really failed
 789                         totalBytesWritten += usedLen;
 790                         numCharsProcessed += numChars;
 791                         break;
 792                     }
 793                 }
 794
 795                 totalBytesWritten += usedLen;
 796                 numCharsProcessed += numChars;
 797
 798                 rangeLoc += numChars;
 799                 rangeLen -= numChars;
 800                 if (max) {
 801                     buffer += usedLen;
 802                     max -= usedLen;
 803                     if (max <= 0) break;
 804                 }
 805                 lastUsedLen = usedLen; lastNumChars = numChars;
 806                 flags &= ~kCFStringEncodingPrependBOM;
 807             }
 808         }
 809     }
 810     if (usedBufLen) *usedBufLen = totalBytesWritten;
 811     return numCharsProcessed;
 812 }
 813
 814 CFStringRef CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc, const char *buffer) {
 815     return CFStringCreateWithCString(alloc, buffer, CFStringFileSystemEncoding());
 816 }
 817
 818 CFIndex CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string) {
 819     CFIndex len = CFStringGetLength(string);
 820     CFStringEncoding enc = CFStringGetFastestEncoding(string);
 821     switch (enc) {
 822         case kCFStringEncodingASCII:
 823         case kCFStringEncodingMacRoman:
 824             if (len > (LONG_MAX - 1L) / 3L) return kCFNotFound;     // Avoid wrap-around
 825             return len * 3L + 1L;
 826         default:
 827             if (len > (LONG_MAX - 1L) / 9L) return kCFNotFound;     // Avoid wrap-around
 828             return len * 9L + 1L;
 829     }
 830 }
 831
 832 Boolean CFStringGetFileSystemRepresentation(CFStringRef string, char *buffer, CFIndex maxBufLen) {
 833 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
 834 #define MAX_STACK_BUFFER_LEN    (255)
 835     const UTF16Char *characters = CFStringGetCharactersPtr(string);
 836     const char *bufferLimit = buffer + maxBufLen;
 837     CFIndex length = CFStringGetLength(string);
 838     CFIndex usedBufLen;
 839
 840     if (maxBufLen < length) return false; // Since we're using UTF-8, the byte length is never shorter than the char length. Also, it filters out 0 == maxBufLen
 841
 842     if (NULL == characters) {
 843         UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
 844         CFRange range = CFRangeMake(0, 0);
 845         const char *bytes = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding());
 846
 847         if (NULL != bytes) {
 848             const char *originalBytes = bytes;
 849             const char *bytesLimit = bytes + length;
 850
 851             while ((bytes < bytesLimit) && (buffer < bufferLimit) && (0 == (*bytes & 0x80))) *(buffer++) = *(bytes++);
 852
 853             range.location = bytes - originalBytes;
 854         }
 855         while ((range.location < length) && (buffer < bufferLimit)) {
 856             range.length = length - range.location;
 857             if (range.length > MAX_STACK_BUFFER_LEN) range.length = MAX_STACK_BUFFER_LEN;
 858
 859             CFStringGetCharacters(string, range, charactersBuffer);
 860             if ((range.length == MAX_STACK_BUFFER_LEN) && CFUniCharIsSurrogateHighCharacter(charactersBuffer[MAX_STACK_BUFFER_LEN - 1])) --range.length; // Backup for a high surrogate
 861
 862             if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, bufferLimit - buffer, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
 863
 864             buffer += usedBufLen;
 865             range.location += range.length;
 866         }
 867     } else {
 868         if (!CFUniCharDecompose(characters, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
 869         buffer += usedBufLen;
 870     }
 871
 872     if (buffer < bufferLimit) { // Since the filename has its own limit, this is ok for now
 873         *buffer = '\0';
 874         return true;
 875     } else {
 876         return false;
 877     }
 878 #else
 879     return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
 880 #endif
 881 }
 882
 883 Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
 884     return CFStringGetFileSystemRepresentation(string, (char *)buffer, maxBufLen);
 885 }
 886
 887
 888 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
 889
 890 /* This function is used to obtain users' default script/region code.
 891    The function first looks at environment variable __kCFUserEncodingEnvVariableName, then, reads the configuration file in user's home directory.
 892 */
 893 void _CFStringGetUserDefaultEncoding(UInt32 *oScriptValue, UInt32 *oRegionValue) {
 894     char *stringValue;
 895     char buffer[__kCFMaxDefaultEncodingFileLength];
 896     int uid = getuid();
 897
 898     if ((stringValue = getenv(__kCFUserEncodingEnvVariableName)) != NULL) {
 899         if ((uid == strtol_l(stringValue, &stringValue, 0, NULL)) && (':' == *stringValue)) {
 900             ++stringValue;
 901         } else {
 902             stringValue = NULL;
 903         }
 904     }
 905
 906     if ((stringValue == NULL) && ((uid > 0) || getenv("HOME"))) {
 907         char passwdExtraBuf[1000 + MAXPATHLEN];  // Extra memory buffer for getpwuid_r(); no clue as to how large this should be...
 908         struct passwd passwdBuf, *passwdp = NULL;
 909
 910         switch (getpwuid_r((uid_t)uid, &passwdBuf, passwdExtraBuf, sizeof(passwdExtraBuf), &passwdp)) {
 911             case 0:         // Success
 912                 break;
 913             case ERANGE:    // Somehow we didn't give it enough memory; let the system handle the storage this time; but beware 5778609
 914                 passwdp = getpwuid((uid_t)uid);
 915                 break;
 916             default:
 917                 passwdp = NULL;
 918         }
 919         if (passwdp) {
 920             char filename[MAXPATHLEN + 1];
 921
 922             const char *path = NULL;
 923             if (!issetugid()) {
 924                 path = getenv("CFFIXED_USER_HOME");
 925             }
 926             if (!path) {
 927                 path = passwdp->pw_dir;
 928             }
 929
 930             strlcpy(filename, path, sizeof(filename));
 931             strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
 932
 933             int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
 934             int fd = open(filename, O_RDONLY, 0);
 935             if (fd == -1) {
 936                 // Cannot open the file. Let's fallback to smRoman/verUS
 937                 snprintf(filename, sizeof(filename), "0x%X:0:0", uid);
 938                 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
 939             } else {
 940                 int readSize;
 941                 readSize = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
 942                 buffer[(readSize < 0 ? 0 : readSize)] = '\0';
 943                 close(fd);
 944                 stringValue = buffer;
 945
 946                 // Well, we already have a buffer, let's reuse it
 947                 snprintf(filename, sizeof(filename), "0x%X:%s", uid, buffer);
 948                 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
 949             }
 950             if (-1 != no_hang_fd) close(no_hang_fd);
 951         }
 952     }
 953
 954     if (stringValue) {
 955         *oScriptValue = strtol_l(stringValue, &stringValue, 0, NULL);
 956         if (*stringValue == ':') {
 957             if (oRegionValue) *oRegionValue = strtol_l(++stringValue, NULL, 0, NULL);
 958             return;
 959         }
 960     }
 961
 962     // Falling back
 963     *oScriptValue = 0; // smRoman
 964     if (oRegionValue) *oRegionValue = 0; // verUS
 965 }
 966
 967 void _CFStringGetInstallationEncodingAndRegion(uint32_t *encoding, uint32_t *region) {
 968     char buffer[__kCFMaxDefaultEncodingFileLength];
 969     char *stringValue = NULL;
 970
 971     *encoding = 0;
 972     *region = 0;
 973
 974     struct passwd *passwdp = getpwuid((uid_t)0);
 975     if (passwdp) {
 976         const char *path = passwdp->pw_dir;
 977
 978         char filename[MAXPATHLEN + 1];
 979         strlcpy(filename, path, sizeof(filename));
 980         strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
 981
 982         int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
 983         int fd = open(filename, O_RDONLY, 0);
 984         if (0 <= fd) {
 985             size_t size = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
 986             buffer[(size < 0 ? 0 : size)] = '\0';
 987             close(fd);
 988             stringValue = buffer;
 989         }
 990         if (-1 != no_hang_fd) close(no_hang_fd);
 991     }
 992
 993     if (stringValue) {
 994         *encoding = strtol_l(stringValue, &stringValue, 0, NULL);
 995         if (*stringValue == ':') *region = strtol_l(++stringValue, NULL, 0, NULL);
 996     }
 997 }
 998
 999 Boolean _CFStringSaveUserDefaultEncoding(UInt32 iScriptValue, UInt32 iRegionValue) {
1000     Boolean success = false;
1001     struct passwd *passwdp = getpwuid(getuid());
1002     if (passwdp) {
1003         const char *path = passwdp->pw_dir;
1004         if (!issetugid()) {
1005             char *value = getenv("CFFIXED_USER_HOME");
1006             if (value) path = value; // override
1007         }
1008
1009         char filename[MAXPATHLEN + 1];
1010         strlcpy(filename, path, sizeof(filename));
1011         strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
1012
1013         int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
1014         (void)unlink(filename);
1015         int fd = open(filename, O_WRONLY|O_CREAT, 0400);
1016         if (0 <= fd) {
1017             char buffer[__kCFMaxDefaultEncodingFileLength];
1018             size_t size = snprintf(buffer, __kCFMaxDefaultEncodingFileLength, "0x%X:0x%X", (unsigned int)iScriptValue, (unsigned int)iRegionValue);
1019             if (size <= __kCFMaxDefaultEncodingFileLength) {
1020                 int ret = write(fd, buffer, size);
1021                 if (size <= ret) success = true;
1022             }
1023             int save_err = errno;
1024             close(fd);
1025             errno = save_err;
1026         }
1027         int save_err = errno;
1028         if (-1 != no_hang_fd) close(no_hang_fd);
1029         errno = save_err;
1030     }
1031     return success;
1032 }
1033
1034 #endif
1035