CFStringEncodings.c

   1 /*
   2  * Copyright (c) 2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*      CFStringEncodings.c
  25         Copyright (c) 1999-2014, Apple Inc. All rights reserved.
  26         Responsibility: Aki Inoue
  27 */
  28
  29 #include "CFInternal.h"
  30 #include <CoreFoundation/CFString.h>
  31 #include <CoreFoundation/CFByteOrder.h>
  32 #include <CoreFoundation/CFPriv.h>
  33 #include <string.h>
  34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
  35 #include <CoreFoundation/CFUniChar.h>
  36 #include <CoreFoundation/CFUnicodeDecomposition.h>
  37 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
  38 #include <stdlib.h>
  39 #include <fcntl.h>
  40 #include <pwd.h>
  41 #include <sys/param.h>
  42 #include <unistd.h>
  43 #include <string.h>
  44 #include <stdio.h>
  45 #include <xlocale.h>
  46 #include <CoreFoundation/CFStringDefaultEncoding.h>
  47 #endif
  48
  49 static bool __CFWantsToUseASCIICompatibleConversion = false;
  50 CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) { return __CFWantsToUseASCIICompatibleConversion; }
  51
  52 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
  53     __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
  54 }
  55
  56 Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
  57
  58 // To avoid early initialization issues, we just initialize this here
  59 // This should not be const as it is changed
  60 CF_PRIVATE UniChar __CFCharToUniCharTable[256] = {
  61   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
  62  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
  63  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
  64  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
  65  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
  66  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
  67  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
  68 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
  69 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
  70 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
  71 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
  72 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
  73 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
  74 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
  75 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
  76 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
  77 };
  78
  79 CF_PRIVATE void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
  80     if (__CFCharToUniCharFunc != func) {
  81         int ch;
  82         __CFCharToUniCharFunc = func;
  83         if (func) {
  84             for (ch = 128; ch < 256; ch++) {
  85                 UniChar uch;
  86                 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
  87             }
  88         } else {        // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
  89             for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
  90         }
  91     }
  92 }
  93
  94 CF_PRIVATE void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
  95     CFIndex idx;
  96     for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
  97 }
  98
  99
 100 /* The minimum length the output buffers should be in the above functions
 101 */
 102 #define kCFCharConversionBufferLength 512
 103
 104
 105 #define MAX_LOCAL_CHARS         (sizeof(buffer->localBuffer) / sizeof(uint8_t))
 106 #define MAX_LOCAL_UNICHARS      (sizeof(buffer->localBuffer) / sizeof(UniChar))
 107
 108 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
 109 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
 110 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
 111 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
 112 !!! converterFlags is only used for the UTF8 converter at this point
 113 */
 114 Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
 115     return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
 116 }
 117
 118 enum {
 119     __NSNonLossyErrorMode = -1,
 120     __NSNonLossyASCIIMode = 0,
 121     __NSNonLossyBackslashMode = 1,
 122     __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
 123     __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
 124     __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
 125     __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
 126 };
 127
 128 Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
 129     CFIndex idx;
 130     const uint8_t *chars = (const uint8_t *)bytes;
 131     const uint8_t *end = chars + len;
 132     Boolean result = TRUE;
 133
 134     if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
 135
 136     buffer->isASCII = !alwaysUnicode;
 137     buffer->shouldFreeChars = false;
 138     buffer->numChars = 0;
 139
 140     if (0 == len) return true;
 141
 142     buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
 143
 144     if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16
 145         const UTF16Char *src = (const UTF16Char *)bytes;
 146         const UTF16Char *limit = src + (len / sizeof(UTF16Char)); // <rdar://problem/7854378> avoiding odd len issue
 147         bool swap = false;
 148
 149         if (kCFStringEncodingUTF16 == encoding) {
 150             UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);
 151
 152 #if __CF_BIG_ENDIAN__
 153             if (bom == 0xFFFE) swap = true;
 154 #else
 155             if (bom != 0xFEFF) swap = true;
 156 #endif
 157             if (bom) useClientsMemoryPtr = NULL;
 158         } else {
 159 #if __CF_BIG_ENDIAN__
 160             if (kCFStringEncodingUTF16LE == encoding) swap = true;
 161 #else
 162             if (kCFStringEncodingUTF16BE == encoding) swap = true;
 163 #endif
 164         }
 165
 166         buffer->numChars = limit - src;
 167
 168         if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
 169             *useClientsMemoryPtr = true;
 170             buffer->chars.unicode = (UniChar *)src;
 171             buffer->isASCII = false;
 172         } else {
 173             if (buffer->isASCII) {      // Let's see if we can reduce the Unicode down to ASCII...
 174                 const UTF16Char *characters = src;
 175                 UTF16Char mask = (swap ? 0x80FF : 0xFF80);
 176
 177                 while (characters < limit) {
 178                     if (*(characters++) & mask) {
 179                         buffer->isASCII = false;
 180                         break;
 181                     }
 182                 }
 183             }
 184
 185             if (buffer->isASCII) {
 186                 uint8_t *dst;
 187                 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
 188                     if (buffer->numChars > MAX_LOCAL_CHARS) {
 189                         buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
 190                         if (!buffer->chars.ascii) goto memoryErrorExit;
 191                         buffer->shouldFreeChars = true;
 192                     } else {
 193                         buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
 194                     }
 195                 }
 196                 dst = buffer->chars.ascii;
 197
 198                 if (swap) {
 199                     while (src < limit) *(dst++) = (*(src++) >> 8);
 200                 } else {
 201                     while (src < limit) *(dst++) = (uint8_t)*(src++);
 202                 }
 203             } else {
 204                 UTF16Char *dst;
 205
 206                 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
 207                     if (buffer->numChars > MAX_LOCAL_UNICHARS) {
 208                         buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
 209                         if (!buffer->chars.unicode) goto memoryErrorExit;
 210                         buffer->shouldFreeChars = true;
 211                     } else {
 212                         buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
 213                     }
 214                 }
 215                 dst = buffer->chars.unicode;
 216
 217                 if (swap) {
 218                     while (src < limit) *(dst++) = CFSwapInt16(*(src++));
 219                 } else {
 220                     memmove(dst, src, buffer->numChars * sizeof(UTF16Char));
 221                 }
 222             }
 223         }
 224     } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
 225         const UTF32Char *src = (const UTF32Char *)bytes;
 226         const UTF32Char *limit =  src + (len / sizeof(UTF32Char)); // <rdar://problem/7854378> avoiding odd len issue
 227         bool swap = false;
 228         static bool strictUTF32 = (bool)-1;
 229
 230         if ((bool)-1 == strictUTF32) strictUTF32 = (1 != 0);
 231
 232         if (kCFStringEncodingUTF32 == encoding) {
 233             UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);
 234
 235 #if __CF_BIG_ENDIAN__
 236             if (bom == 0xFFFE0000) swap = true;
 237 #else
 238             if (bom != 0x0000FEFF) swap = true;
 239 #endif
 240         } else {
 241 #if __CF_BIG_ENDIAN__
 242             if (kCFStringEncodingUTF32LE == encoding) swap = true;
 243 #else
 244             if (kCFStringEncodingUTF32BE == encoding) swap = true;
 245 #endif
 246         }
 247
 248         buffer->numChars = limit - src;
 249
 250         {
 251             // Let's see if we have non-ASCII or non-BMP
 252             const UTF32Char *characters = src;
 253             UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);
 254             UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);
 255
 256             while (characters < limit) {
 257                 if (*characters & asciiMask) {
 258                     buffer->isASCII = false;
 259                     if (*characters & bmpMask) {
 260                         if (strictUTF32 && ((swap ? (UTF32Char)CFSwapInt32(*characters) : *characters) > 0x10FFFF)) return false; // outside of Unicode Scaler Value. Haven't allocated buffer, yet.
 261                         ++(buffer->numChars);
 262                     }
 263                 }
 264                 ++characters;
 265             }
 266         }
 267
 268         if (buffer->isASCII) {
 269             uint8_t *dst;
 270             if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
 271                 if (buffer->numChars > MAX_LOCAL_CHARS) {
 272                     buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
 273                     if (!buffer->chars.ascii) goto memoryErrorExit;
 274                     buffer->shouldFreeChars = true;
 275                 } else {
 276                     buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
 277                 }
 278             }
 279             dst = buffer->chars.ascii;
 280
 281             if (swap) {
 282                 while (src < limit) *(dst++) = (*(src++) >> 24);
 283             } else {
 284                 while (src < limit) *(dst++) = *(src++);
 285             }
 286         } else {
 287             if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
 288                 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
 289                     buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
 290                     if (!buffer->chars.unicode) goto memoryErrorExit;
 291                     buffer->shouldFreeChars = true;
 292                 } else {
 293                     buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
 294                 }
 295             }
 296             result = (CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, (strictUTF32 ? false : true), __CF_BIG_ENDIAN__ ? !swap : swap) ? TRUE : FALSE);
 297         }
 298     } else if (kCFStringEncodingUTF8 == encoding) {
 299         if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) {     // If UTF8 BOM, skip
 300             chars += 3;
 301             len -= 3;
 302             if (0 == len) return true;
 303         }
 304         if (buffer->isASCII) {
 305             for (idx = 0; idx < len; idx++) {
 306                 if (128 <= chars[idx]) {
 307                     buffer->isASCII = false;
 308                     break;
 309                 }
 310             }
 311         }
 312         if (buffer->isASCII) {
 313             buffer->numChars = len;
 314             buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 315             buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 316             if (!buffer->chars.ascii) goto memoryErrorExit;
 317             memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 318         } else {
 319             CFIndex numDone;
 320             static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
 321
 322             if (!__CFFromUTF8) {
 323                 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
 324                 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
 325             }
 326
 327             buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 328             buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 329             if (!buffer->chars.unicode) goto memoryErrorExit;
 330             buffer->numChars = 0;
 331             while (chars < end) {
 332                 numDone = 0;
 333                 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
 334
 335                 if (0 == numDone) {
 336                     result = FALSE;
 337                     break;
 338                 }
 339                 buffer->numChars += numDone;
 340             }
 341         }
 342     } else if (kCFStringEncodingNonLossyASCII == encoding) {
 343         UTF16Char currentValue = 0;
 344         uint8_t character;
 345         int8_t mode = __NSNonLossyASCIIMode;
 346
 347         buffer->isASCII = false;
 348         buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 349         buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 350         if (!buffer->chars.unicode) goto memoryErrorExit;
 351         buffer->numChars = 0;
 352
 353         while (chars < end) {
 354             character = (*chars++);
 355
 356             switch (mode) {
 357                 case __NSNonLossyASCIIMode:
 358                     if (character == '\\') {
 359                         mode = __NSNonLossyBackslashMode;
 360                     } else if (character < 0x80) {
 361                         currentValue = character;
 362                     } else {
 363                         mode = __NSNonLossyErrorMode;
 364                     }
 365                     break;
 366
 367                     case __NSNonLossyBackslashMode:
 368                     if ((character == 'U') || (character == 'u')) {
 369                         mode = __NSNonLossyHexInitialMode;
 370                         currentValue = 0;
 371                     } else if ((character >= '0') && (character <= '9')) {
 372                         mode = __NSNonLossyOctalInitialMode;
 373                         currentValue = character - '0';
 374                     } else if (character == '\\') {
 375                         mode = __NSNonLossyASCIIMode;
 376                         currentValue = character;
 377                     } else {
 378                         mode = __NSNonLossyErrorMode;
 379                     }
 380                     break;
 381
 382                     default:
 383                     if (mode < __NSNonLossyHexFinalMode) {
 384                         if ((character >= '0') && (character <= '9')) {
 385                             currentValue = (currentValue << 4) | (character - '0');
 386                             if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
 387                         } else {
 388                             if (character >= 'a') character -= ('a' - 'A');
 389                             if ((character >= 'A') && (character <= 'F')) {
 390                                 currentValue = (currentValue << 4) | ((character - 'A') + 10);
 391                                 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
 392                             } else {
 393                                 mode = __NSNonLossyErrorMode;
 394                             }
 395                         }
 396                     } else {
 397                         if ((character >= '0') && (character <= '9')) {
 398                             currentValue = (currentValue << 3) | (character - '0');
 399                             if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
 400                         } else {
 401                             mode = __NSNonLossyErrorMode;
 402                         }
 403                     }
 404                     break;
 405             }
 406
 407             if (mode == __NSNonLossyASCIIMode) {
 408                 buffer->chars.unicode[buffer->numChars++] = currentValue;
 409             } else if (mode == __NSNonLossyErrorMode) {
 410                 break;
 411             }
 412         }
 413         result = ((mode == __NSNonLossyASCIIMode) ? YES : NO);
 414     } else {
 415         const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
 416
 417         if (!converter) return false;
 418
 419         Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
 420
 421         if (!isASCIISuperset) buffer->isASCII = false;
 422
 423         if (buffer->isASCII) {
 424             for (idx = 0; idx < len; idx++) {
 425                 if (128 <= chars[idx]) {
 426                     buffer->isASCII = false;
 427                     break;
 428                 }
 429             }
 430         }
 431
 432         if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
 433             if (buffer->isASCII) {
 434                 buffer->numChars = len;
 435                 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 436                 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 437                 if (!buffer->chars.ascii) goto memoryErrorExit;
 438                 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 439             } else {
 440                 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
 441                 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
 442                 if (!buffer->chars.unicode) goto memoryErrorExit;
 443                 buffer->numChars = len;
 444                 if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
 445                     for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
 446                 } else {
 447                     for (idx = 0; idx < len; idx++) {
 448                         if (chars[idx] < 0x80 && isASCIISuperset) {
 449                             buffer->chars.unicode[idx] = (UniChar)chars[idx];
 450                         } else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx)) {
 451                             result = FALSE;
 452                             break;
 453                         }
 454                     }
 455                 }
 456             }
 457         } else {
 458             if (buffer->isASCII) {
 459                 buffer->numChars = len;
 460                 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
 461                 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
 462                 if (!buffer->chars.ascii) goto memoryErrorExit;
 463                 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
 464             } else {
 465                 CFIndex guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
 466                 static UInt32 lossyFlag = (UInt32)-1;
 467
 468                 buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
 469                 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
 470                 if (!buffer->chars.unicode) goto memoryErrorExit;
 471
 472                 if (lossyFlag == (UInt32)-1) lossyFlag = 0;
 473
 474                 if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) result = FALSE;
 475             }
 476         }
 477     }
 478
 479     if (FALSE == result) {
 480 memoryErrorExit:        // Added for <rdar://problem/6581621>, but it's not clear whether an exception would be a better option
 481         result = FALSE; // In case we come here from a goto
 482         if (buffer->shouldFreeChars && buffer->chars.unicode) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
 483         buffer->isASCII = !alwaysUnicode;
 484         buffer->shouldFreeChars = false;
 485         buffer->chars.ascii = NULL;
 486         buffer->numChars = 0;
 487     }
 488     return result;
 489 }
 490
 491
 492 /* Create a byte stream from a CFString backing. Can convert a string piece at a time
 493    into a fixed size buffer. Returns number of characters converted.
 494    Characters that cannot be converted to the specified encoding are represented
 495    with the char specified by lossByte; if 0, then lossy conversion is not allowed
 496    and conversion stops, returning partial results.
 497    Pass buffer==NULL if you don't care about the converted string (but just the convertability,
 498    or number of bytes required, indicated by usedBufLen).
 499    Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
 500
 501    Note: This function is intended to work through CFString functions, so it should work
 502    with NSStrings as well as CFStrings.
 503 */
 504 CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
 505     CFIndex totalBytesWritten = 0;      /* Number of written bytes */
 506     CFIndex numCharsProcessed = 0;      /* Number of processed chars */
 507     const UniChar *unichars;
 508
 509     if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
 510         static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
 511
 512         if (!__CFToUTF8) {
 513             const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
 514             __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
 515         }
 516         numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
 517
 518     } else if (encoding == kCFStringEncodingNonLossyASCII) {
 519         const char *hex = "0123456789abcdef";
 520         UniChar ch;
 521         CFStringInlineBuffer buf;
 522         CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
 523         while (numCharsProcessed < rangeLen) {
 524             CFIndex reqLength; /* Required number of chars to encode this UniChar */
 525             CFIndex cnt;
 526             char tmp[6];
 527             ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
 528             if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
 529                 reqLength = 1;
 530                 tmp[0] = (char)ch;
 531             } else {
 532                 if (ch == '\\') {
 533                     tmp[1] = '\\';
 534                     reqLength = 2;
 535                 } else if (ch < 256) {  /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
 536                     tmp[1] = '0' + (ch >> 6);
 537                     tmp[2] = '0' + ((ch >> 3) & 7);
 538                     tmp[3] = '0' + (ch & 7);
 539                     reqLength = 4;
 540                 } else {        /* \Unnnn */
 541                     tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
 542                     tmp[2] = hex[(ch >> 12) & 0x0f];
 543                     tmp[3] = hex[(ch >> 8) & 0x0f];
 544                     tmp[4] = hex[(ch >> 4) & 0x0f];
 545                     tmp[5] = hex[ch & 0x0f];
 546                     reqLength = 6;
 547                 }
 548                 tmp[0] = '\\';
 549             }
 550             if (buffer) {
 551                 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
 552 .*/
 553                 for (cnt = 0; cnt < reqLength; cnt++) {
 554                     buffer[totalBytesWritten + cnt] = tmp[cnt];
 555                 }
 556             }
 557             totalBytesWritten += reqLength;
 558             numCharsProcessed++;
 559         }
 560     } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
 561         CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
 562         numCharsProcessed = rangeLen;
 563         if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
 564             numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
 565         }
 566         totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
 567         if (buffer) {
 568             if (extraForBOM) {  /* Generate BOM */
 569 #if __CF_BIG_ENDIAN__
 570                 *buffer++ = 0xfe; *buffer++ = 0xff;
 571 #else
 572                 *buffer++ = 0xff; *buffer++ = 0xfe;
 573 #endif
 574             }
 575             CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
 576             if ((__CF_BIG_ENDIAN__ ?  kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap
 577                 UTF16Char *characters = (UTF16Char *)buffer;
 578                 const UTF16Char *limit = characters + numCharsProcessed;
 579
 580                 while (characters < limit) {
 581                     *characters = CFSwapInt16(*characters);
 582                     ++characters;
 583                 }
 584             }
 585         }
 586     } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
 587         UTF32Char character;
 588         CFStringInlineBuffer buf;
 589         UTF32Char *characters = (UTF32Char *)buffer;
 590
 591         bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);
 592         if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
 593             totalBytesWritten += sizeof(UTF32Char);
 594             if (characters) {
 595                 if (totalBytesWritten > max) { // insufficient buffer
 596                     totalBytesWritten = 0;
 597                 } else {
 598                     *(characters++) = 0x0000FEFF;
 599                 }
 600             }
 601         }
 602
 603         CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
 604         while (numCharsProcessed < rangeLen) {
 605             character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
 606
 607             if (CFUniCharIsSurrogateHighCharacter(character)) {
 608                 UTF16Char otherCharacter;
 609
 610                 if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
 611                     character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
 612                 } else if (lossByte) {
 613                     character = lossByte;
 614                 } else {
 615                     break;
 616                 }
 617             } else if (CFUniCharIsSurrogateLowCharacter(character)) {
 618                 if (lossByte) {
 619                     character = lossByte;
 620                 } else {
 621                     break;
 622                 }
 623             }
 624
 625             totalBytesWritten += sizeof(UTF32Char);
 626
 627             if (characters) {
 628                 if (totalBytesWritten > max) {
 629                     totalBytesWritten -= sizeof(UTF32Char);
 630                     break;
 631                 }
 632                 *(characters++) = (swap ? CFSwapInt32(character) : character);
 633             }
 634
 635             numCharsProcessed += (character > 0xFFFF ? 2 : 1);
 636         }
 637     } else {
 638         CFIndex numChars;
 639         UInt32 flags;
 640         const unsigned char *cString = NULL;
 641         Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
 642
 643         if (!CFStringEncodingIsValidEncoding(encoding)) return 0;
 644
 645         if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
 646             const unsigned char *ptr;
 647             if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
 648                 ptr = (cString += rangeLoc);
 649                 if (__CFStringGetEightBitStringEncoding() == encoding) {
 650                     numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
 651                     if (buffer) memmove(buffer, cString, numCharsProcessed);
 652                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 653                     return numCharsProcessed;
 654                 }
 655
 656                 CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;
 657                 while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {
 658                     ++ptr;
 659                     --rangeLen;
 660                 }
 661                 numCharsProcessed = ptr - cString;
 662                 if (buffer) {
 663                     numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
 664                     memmove(buffer, cString, numCharsProcessed);
 665                     buffer += numCharsProcessed;
 666                     max -= numCharsProcessed;
 667                 }
 668                 if (!rangeLen || (buffer && (max == 0))) {
 669                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 670                     return numCharsProcessed;
 671                 }
 672                 rangeLoc += numCharsProcessed;
 673                 totalBytesWritten += numCharsProcessed;
 674             }
 675             if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
 676                 ptr = (cString += (rangeLoc + 1));
 677                 if (__CFStringGetEightBitStringEncoding() == encoding) {
 678                     numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
 679                     if (buffer) memmove(buffer, cString, numCharsProcessed);
 680                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 681                     return numCharsProcessed;
 682                 }
 683                 while (*ptr < 0x80 && rangeLen > 0) {
 684                     ++ptr;
 685                     --rangeLen;
 686                 }
 687                 numCharsProcessed = ptr - cString;
 688                 if (buffer) {
 689                     numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
 690                     memmove(buffer, cString, numCharsProcessed);
 691                     buffer += numCharsProcessed;
 692                     max -= numCharsProcessed;
 693                 }
 694                 if (!rangeLen || (buffer && (max == 0))) {
 695                     if (usedBufLen) *usedBufLen = numCharsProcessed;
 696                     return numCharsProcessed;
 697                 }
 698                 rangeLoc += numCharsProcessed;
 699                 totalBytesWritten += numCharsProcessed;
 700             }
 701         }
 702
 703         if (!buffer) max = 0;
 704
 705         // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
 706         // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
 707         flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());
 708
 709         if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string
 710             CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
 711         } else {
 712             UniChar charBuf[kCFCharConversionBufferLength];
 713             CFIndex currentLength;
 714             CFIndex usedLen;
 715             CFIndex lastUsedLen = 0, lastNumChars = 0;
 716             uint32_t result;
 717             uint32_t streamingMask;
 718             uint32_t streamID = 0;
 719 #define MAX_DECOMP_LEN (6)
 720
 721             while (rangeLen > 0) {
 722                 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
 723                 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
 724
 725                 // could be in the middle of surrogate pair; back up.
 726                 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
 727
 728                 streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);
 729
 730                 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);
 731                 streamID = CFStringEncodingStreamIDFromMask(result);
 732                 result &= ~CFStringEncodingStreamIDMask;
 733
 734                 if (result != kCFStringEncodingConversionSuccess) {
 735                     if (kCFStringEncodingInvalidInputStream == result) {
 736                         CFRange composedRange;
 737                         // Check the tail
 738                         if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
 739                             composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
 740
 741                             if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
 742                                 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
 743                                 streamID = CFStringEncodingStreamIDFromMask(result);
 744                                 result &= ~CFStringEncodingStreamIDMask;
 745                             }
 746                         }
 747
 748                         // Check the head
 749                         if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
 750                             composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
 751
 752                             if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
 753                                 // Try if the composed range can be converted
 754                                 CFStringGetCharacters(string, composedRange, charBuf);
 755
 756                                 if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
 757                                     CFIndex lastRangeLoc = rangeLoc - lastNumChars;
 758
 759                                     currentLength = composedRange.location - lastRangeLoc;
 760                                     CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
 761
 762                                     result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);
 763                                     streamID = CFStringEncodingStreamIDFromMask(result);
 764                                     result &= ~CFStringEncodingStreamIDMask;
 765
 766                                     if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run
 767                                         // Looks good. back up
 768                                         totalBytesWritten -= lastUsedLen;
 769                                         numCharsProcessed -= lastNumChars;
 770
 771                                         rangeLoc = lastRangeLoc;
 772                                         rangeLen += lastNumChars;
 773
 774                                         if (max) {
 775                                             buffer -= lastUsedLen;
 776                                             max += lastUsedLen;
 777                                         }
 778                                     }
 779                                 }
 780                             }
 781                         }
 782                     }
 783
 784                     if (kCFStringEncodingConversionSuccess != result) { // really failed
 785                         totalBytesWritten += usedLen;
 786                         numCharsProcessed += numChars;
 787                         break;
 788                     }
 789                 }
 790
 791                 totalBytesWritten += usedLen;
 792                 numCharsProcessed += numChars;
 793
 794                 rangeLoc += numChars;
 795                 rangeLen -= numChars;
 796                 if (max) {
 797                     buffer += usedLen;
 798                     max -= usedLen;
 799                     if (max <= 0) break;
 800                 }
 801                 lastUsedLen = usedLen; lastNumChars = numChars;
 802                 flags &= ~kCFStringEncodingPrependBOM;
 803             }
 804         }
 805     }
 806     if (usedBufLen) *usedBufLen = totalBytesWritten;
 807     return numCharsProcessed;
 808 }
 809
 810 CFStringRef CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc, const char *buffer) {
 811     return CFStringCreateWithCString(alloc, buffer, CFStringFileSystemEncoding());
 812 }
 813
 814 CFIndex CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string) {
 815     CFIndex len = CFStringGetLength(string);
 816     CFStringEncoding enc = CFStringGetFastestEncoding(string);
 817     switch (enc) {
 818         case kCFStringEncodingASCII:
 819         case kCFStringEncodingMacRoman:
 820             if (len > (LONG_MAX - 1L) / 3L) return kCFNotFound;     // Avoid wrap-around
 821             return len * 3L + 1L;
 822         default:
 823             if (len > (LONG_MAX - 1L) / 9L) return kCFNotFound;     // Avoid wrap-around
 824             return len * 9L + 1L;
 825     }
 826 }
 827
 828 Boolean CFStringGetFileSystemRepresentation(CFStringRef string, char *buffer, CFIndex maxBufLen) {
 829 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
 830 #define MAX_STACK_BUFFER_LEN    (255)
 831     const UTF16Char *characters = CFStringGetCharactersPtr(string);
 832     const char *origBuffer = buffer;
 833     const char *bufferLimit = buffer + maxBufLen;
 834     CFIndex length = CFStringGetLength(string);
 835     CFIndex usedBufLen;
 836
 837     if (maxBufLen < length) return false; // Since we're using UTF-8, the byte length is never shorter than the char length. Also, it filters out 0 == maxBufLen
 838
 839     if (NULL == characters) {
 840         UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
 841         CFRange range = CFRangeMake(0, 0);
 842         const char *bytes = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding());
 843
 844         if (NULL != bytes) {
 845             const char *originalBytes = bytes;
 846             const char *bytesLimit = bytes + length;
 847
 848             while ((bytes < bytesLimit) && (buffer < bufferLimit) && (0 == (*bytes & 0x80))) *(buffer++) = *(bytes++);
 849
 850             range.location = bytes - originalBytes;
 851         }
 852         while ((range.location < length) && (buffer < bufferLimit)) {
 853             range.length = length - range.location;
 854             if (range.length > MAX_STACK_BUFFER_LEN) range.length = MAX_STACK_BUFFER_LEN;
 855
 856             CFStringGetCharacters(string, range, charactersBuffer);
 857             if ((range.length == MAX_STACK_BUFFER_LEN) && CFUniCharIsSurrogateHighCharacter(charactersBuffer[MAX_STACK_BUFFER_LEN - 1])) --range.length; // Backup for a high surrogate
 858
 859             if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, bufferLimit - buffer, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
 860
 861             buffer += usedBufLen;
 862             range.location += range.length;
 863         }
 864     } else {
 865         if (!CFUniCharDecompose(characters, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
 866         buffer += usedBufLen;
 867     }
 868
 869     if (buffer < bufferLimit) { // Since the filename has its own limit, this is ok for now
 870         *buffer = '\0';
 871         if (_CFExecutableLinkedOnOrAfter(CFSystemVersionLion)) {
 872             while (origBuffer < buffer) if (*origBuffer++ == 0) {       // There's a zero in there. Now see if the rest are all zeroes.
 873                 while (origBuffer < buffer) if (*origBuffer++ != 0) return false;       // Embedded NULLs should cause failure: <rdar://problem/5863219>
 874             }
 875         }
 876         return true;
 877     } else {
 878         return false;
 879     }
 880 #else
 881     return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
 882 #endif
 883 }
 884
 885 Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
 886     return CFStringGetFileSystemRepresentation(string, (char *)buffer, maxBufLen);
 887 }
 888
 889
 890 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
 891
 892 /* This function is used to obtain users' default script/region code.
 893    The function first looks at environment variable __kCFUserEncodingEnvVariableName, then, reads the configuration file in user's home directory.
 894 */
 895 void _CFStringGetUserDefaultEncoding(UInt32 *oScriptValue, UInt32 *oRegionValue) {
 896     char *stringValue;
 897     char buffer[__kCFMaxDefaultEncodingFileLength];
 898     int uid = getuid();
 899
 900     if ((stringValue = (char *)__CFgetenv(__kCFUserEncodingEnvVariableName)) != NULL) {
 901         if ((uid == strtol_l(stringValue, &stringValue, 0, NULL)) && (':' == *stringValue)) {
 902             ++stringValue;
 903         } else {
 904             stringValue = NULL;
 905         }
 906     }
 907
 908     if ((stringValue == NULL) && ((uid > 0) || __CFgetenv("HOME"))) {
 909         char passwdExtraBuf[1000 + MAXPATHLEN];  // Extra memory buffer for getpwuid_r(); no clue as to how large this should be...
 910         struct passwd passwdBuf, *passwdp = NULL;
 911
 912         switch (getpwuid_r((uid_t)uid, &passwdBuf, passwdExtraBuf, sizeof(passwdExtraBuf), &passwdp)) {
 913             case 0:         // Success
 914                 break;
 915             case ERANGE:    // Somehow we didn't give it enough memory; let the system handle the storage this time; but beware 5778609
 916                 passwdp = getpwuid((uid_t)uid);
 917                 break;
 918             default:
 919                 passwdp = NULL;
 920         }
 921         if (passwdp) {
 922             char filename[MAXPATHLEN + 1];
 923
 924             const char *path = NULL;
 925             if (!issetugid()) {
 926                 path = __CFgetenv("CFFIXED_USER_HOME");
 927             }
 928             if (!path) {
 929                 path = passwdp->pw_dir;
 930             }
 931
 932             strlcpy(filename, path, sizeof(filename));
 933             strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
 934
 935             int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
 936             int fd = open(filename, O_RDONLY, 0);
 937             if (fd == -1) {
 938                 // Cannot open the file. Let's fallback to smRoman/verUS
 939                 snprintf(filename, sizeof(filename), "0x%X:0:0", uid);
 940                 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
 941             } else {
 942                 ssize_t readSize;
 943                 readSize = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
 944                 buffer[(readSize < 0 ? 0 : readSize)] = '\0';
 945                 close(fd);
 946                 stringValue = buffer;
 947
 948                 // Well, we already have a buffer, let's reuse it
 949                 snprintf(filename, sizeof(filename), "0x%X:%s", uid, buffer);
 950                 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
 951             }
 952             if (-1 != no_hang_fd) close(no_hang_fd);
 953         }
 954     }
 955
 956     if (stringValue) {
 957         *oScriptValue = strtol_l(stringValue, &stringValue, 0, NULL);
 958         if (*stringValue == ':') {
 959             if (oRegionValue) *oRegionValue = strtol_l(++stringValue, NULL, 0, NULL);
 960             return;
 961         }
 962     }
 963
 964     // Falling back
 965     *oScriptValue = 0; // smRoman
 966     if (oRegionValue) *oRegionValue = 0; // verUS
 967 }
 968
 969 void _CFStringGetInstallationEncodingAndRegion(uint32_t *encoding, uint32_t *region) {
 970     char buffer[__kCFMaxDefaultEncodingFileLength];
 971     char *stringValue = NULL;
 972
 973     *encoding = 0;
 974     *region = 0;
 975
 976     struct passwd *passwdp = getpwuid((uid_t)0);
 977     if (passwdp) {
 978         const char *path = passwdp->pw_dir;
 979
 980         char filename[MAXPATHLEN + 1];
 981         strlcpy(filename, path, sizeof(filename));
 982         strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
 983
 984         int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
 985         int fd = open(filename, O_RDONLY, 0);
 986         if (0 <= fd) {
 987             ssize_t size = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
 988             buffer[(size < 0 ? 0 : size)] = '\0';
 989             close(fd);
 990             stringValue = buffer;
 991         }
 992         if (-1 != no_hang_fd) close(no_hang_fd);
 993     }
 994
 995     if (stringValue) {
 996         *encoding = strtol_l(stringValue, &stringValue, 0, NULL);
 997         if (*stringValue == ':') *region = strtol_l(++stringValue, NULL, 0, NULL);
 998     }
 999 }
1000
1001 Boolean _CFStringSaveUserDefaultEncoding(UInt32 iScriptValue, UInt32 iRegionValue) {
1002     Boolean success = false;
1003     struct passwd *passwdp = getpwuid(getuid());
1004     if (passwdp) {
1005         const char *path = passwdp->pw_dir;
1006         if (!issetugid()) {
1007             const char *value = __CFgetenv("CFFIXED_USER_HOME");
1008             if (value) path = value; // override
1009         }
1010
1011         char filename[MAXPATHLEN + 1];
1012         strlcpy(filename, path, sizeof(filename));
1013         strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
1014
1015         int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
1016         (void)unlink(filename);
1017         int fd = open(filename, O_WRONLY|O_CREAT, 0400);
1018         if (0 <= fd) {
1019             char buffer[__kCFMaxDefaultEncodingFileLength];
1020             size_t size = snprintf(buffer, __kCFMaxDefaultEncodingFileLength, "0x%X:0x%X", (unsigned int)iScriptValue, (unsigned int)iRegionValue);
1021             if (size <= __kCFMaxDefaultEncodingFileLength) {
1022                 int ret = write(fd, buffer, size);
1023                 if (size <= ret) success = true;
1024             }
1025             int save_err = errno;
1026             close(fd);
1027             errno = save_err;
1028         }
1029         int save_err = errno;
1030         if (-1 != no_hang_fd) close(no_hang_fd);
1031         errno = save_err;
1032     }
1033     return success;
1034 }
1035
1036 #endif
1037