git.saurik.com Git - apple/cf.git/blob

1 /*

3 *

4 * @APPLE_LICENSE_HEADER_START@

5 *

6 * This file contains Original Code and/or Modifications of Original Code

7 * as defined in and that are subject to the Apple Public Source License

8 * Version 2.0 (the 'License'). You may not use this file except in

9 * compliance with the License. Please obtain a copy of the License at

10 * http://www.opensource.apple.com/apsl/ and read it before using this

11 * file.

12 *

13 * The Original Code and all software distributed under the License are

14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER

15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,

16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,

17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.

18 * Please see the License for the specific language governing rights and

19 * limitations under the License.

20 *

21 * @APPLE_LICENSE_HEADER_END@

22 */

24 /* CFStringEncodings.c

26 Responsibility: Aki Inoue

27 */

29 #include "CFInternal.h"

30 #include <CoreFoundation/CFString.h>

31 #include <CoreFoundation/CFByteOrder.h>

32 #include <CoreFoundation/CFPriv.h>

33 #include <string.h>

34 #include <CoreFoundation/CFStringEncodingConverterExt.h>

35 #include <CoreFoundation/CFUniChar.h>

36 #include <CoreFoundation/CFUnicodeDecomposition.h>

37 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)

38 #include <stdlib.h>

39 #include <fcntl.h>

40 #include <pwd.h>

41 #include <sys/param.h>

42 #include <unistd.h>

43 #include <string.h>

44 #include <stdio.h>

45 #include <xlocale.h>

46 #include <CoreFoundation/CFStringDefaultEncoding.h>

47 #endif

49 static bool __CFWantsToUseASCIICompatibleConversion = false;

 CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) { return __CFWantsToUseASCIICompatibleConversion; }

52 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {

     __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);

54 }

 Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;

58 // To avoid early initialization issues, we just initialize this here

59 // This should not be const as it is changed

60 CF_PRIVATE UniChar __CFCharToUniCharTable[256] = {

   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,

  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,

  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,

  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,

  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,

  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,

  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,

 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,

 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,

 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,

 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,

 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,

 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,

 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,

 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,

 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255

77 };

 CF_PRIVATE void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {

80 if (__CFCharToUniCharFunc != func) {

81 int ch;

82 __CFCharToUniCharFunc = func;

83 if (func) {

             for (ch = 128; ch < 256; ch++) {

85 UniChar uch;

                 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);

87 }

88 } else { // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is

             for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;

90 }

91 }

92 }

 CF_PRIVATE void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {

95 CFIndex idx;

     for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];

97 }

100 /* The minimum length the output buffers should be in the above functions

101 */

102 #define kCFCharConversionBufferLength 512

103

104

105 #define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))

106 #define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))

107

108 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.

109 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL

110 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true

111 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated

112 !!! converterFlags is only used for the UTF8 converter at this point

113 */

 Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {

     return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);

116 }

117

118 enum {

119 __NSNonLossyErrorMode = -1,

120 __NSNonLossyASCIIMode = 0,

121 __NSNonLossyBackslashMode = 1,

122 __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,

123 __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,

124 __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,

125 __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3

126 };

127

 Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {

129 CFIndex idx;

     const uint8_t *chars = (const uint8_t *)bytes;

     const uint8_t *end = chars + len;

132 Boolean result = TRUE;

133

     if (useClientsMemoryPtr) *useClientsMemoryPtr = false;

135

136 buffer->isASCII = !alwaysUnicode;

137 buffer->shouldFreeChars = false;

138 buffer->numChars = 0;

139

     if (0 == len) return true;

141

     buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());

143

     if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16

         const UTF16Char *src = (const UTF16Char *)bytes;

         const UTF16Char *limit = src + (len / sizeof(UTF16Char)); // <rdar://problem/7854378> avoiding odd len issue

147 bool swap = false;

148

149 if (kCFStringEncodingUTF16 == encoding) {

             UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);

151

152 #if __CF_BIG_ENDIAN__

             if (bom == 0xFFFE) swap = true;

154 #else

             if (bom != 0xFEFF) swap = true;

156 #endif

             if (bom) useClientsMemoryPtr = NULL;

158 } else {

159 #if __CF_BIG_ENDIAN__

             if (kCFStringEncodingUTF16LE == encoding) swap = true;

161 #else

             if (kCFStringEncodingUTF16BE == encoding) swap = true;

163 #endif

164 }

165

166 buffer->numChars = limit - src;

167

         if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...

169 *useClientsMemoryPtr = true;

             buffer->chars.unicode = (UniChar *)src;

171 buffer->isASCII = false;

172 } else {

             if (buffer->isASCII) {      // Let's see if we can reduce the Unicode down to ASCII...

174 const UTF16Char *characters = src;

                 UTF16Char mask = (swap ? 0x80FF : 0xFF80);

176

177 while (characters < limit) {

178 if (*(characters++) & mask) {

179 buffer->isASCII = false;

180 break;

181 }

182 }

183 }

184

185 if (buffer->isASCII) {

186 uint8_t *dst;

                 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied

                     if (buffer->numChars > MAX_LOCAL_CHARS) {

                         buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);

                         if (!buffer->chars.ascii) goto memoryErrorExit;

191 buffer->shouldFreeChars = true;

192 } else {

                         buffer->chars.ascii = (uint8_t *)buffer->localBuffer;

194 }

195 }

196 dst = buffer->chars.ascii;

197

198 if (swap) {

                     while (src < limit) *(dst++) = (*(src++) >> 8);

200 } else {

                     while (src < limit) *(dst++) = (uint8_t)*(src++);

202 }

203 } else {

204 UTF16Char *dst;

205

                 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied

                     if (buffer->numChars > MAX_LOCAL_UNICHARS) {

                         buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);

                         if (!buffer->chars.unicode) goto memoryErrorExit;

210 buffer->shouldFreeChars = true;

211 } else {

                         buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;

213 }

214 }

215 dst = buffer->chars.unicode;

216

217 if (swap) {

                     while (src < limit) *(dst++) = CFSwapInt16(*(src++));

219 } else {

                     memmove(dst, src, buffer->numChars * sizeof(UTF16Char));

221 }

222 }

223 }

     } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {

         const UTF32Char *src = (const UTF32Char *)bytes;

         const UTF32Char *limit =  src + (len / sizeof(UTF32Char)); // <rdar://problem/7854378> avoiding odd len issue

227 bool swap = false;

         static bool strictUTF32 = (bool)-1;

229

         if ((bool)-1 == strictUTF32) strictUTF32 = (1 != 0);

231

232 if (kCFStringEncodingUTF32 == encoding) {

             UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);

234

235 #if __CF_BIG_ENDIAN__

             if (bom == 0xFFFE0000) swap = true;

237 #else

             if (bom != 0x0000FEFF) swap = true;

239 #endif

240 } else {

241 #if __CF_BIG_ENDIAN__

             if (kCFStringEncodingUTF32LE == encoding) swap = true;

243 #else

             if (kCFStringEncodingUTF32BE == encoding) swap = true;

245 #endif

246 }

247

248 buffer->numChars = limit - src;

249

250 {

251 // Let's see if we have non-ASCII or non-BMP

252 const UTF32Char *characters = src;

             UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);

             UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);

255

256 while (characters < limit) {

257 if (*characters & asciiMask) {

258 buffer->isASCII = false;

259 if (*characters & bmpMask) {

                         if (strictUTF32 && ((swap ? (UTF32Char)CFSwapInt32(*characters) : *characters) > 0x10FFFF)) return false; // outside of Unicode Scaler Value. Haven't allocated buffer, yet.

261 ++(buffer->numChars);

262 }

263 }

264 ++characters;

265 }

266 }

267

268 if (buffer->isASCII) {

269 uint8_t *dst;

             if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied

                 if (buffer->numChars > MAX_LOCAL_CHARS) {

                     buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);

                     if (!buffer->chars.ascii) goto memoryErrorExit;

274 buffer->shouldFreeChars = true;

275 } else {

                     buffer->chars.ascii = (uint8_t *)buffer->localBuffer;

277 }

278 }

279 dst = buffer->chars.ascii;

280

281 if (swap) {

                 while (src < limit) *(dst++) = (*(src++) >> 24);

283 } else {

                 while (src < limit) *(dst++) = *(src++);

285 }

286 } else {

             if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied

                 if (buffer->numChars > MAX_LOCAL_UNICHARS) {

                     buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);

                     if (!buffer->chars.unicode) goto memoryErrorExit;

291 buffer->shouldFreeChars = true;

292 } else {

                     buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;

294 }

295 }

             result = (CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, (strictUTF32 ? false : true), __CF_BIG_ENDIAN__ ? !swap : swap) ? TRUE : FALSE);

297 }

     } else if (kCFStringEncodingUTF8 == encoding) {    

         if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) {     // If UTF8 BOM, skip

300 chars += 3;

301 len -= 3;

             if (0 == len) return true;

303 }

304 if (buffer->isASCII) {

             for (idx = 0; idx < len; idx++) {

                 if (128 <= chars[idx]) {

307 buffer->isASCII = false;

308 break;

309 }

310 }

311 }

312 if (buffer->isASCII) {

313 buffer->numChars = len;

             buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;

             buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));

             if (!buffer->chars.ascii) goto memoryErrorExit;

             memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));

318 } else {

319 CFIndex numDone;

320 static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;

321

322 if (!__CFFromUTF8) {

                 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);

324 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;

325 }

326

             buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;

             buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));

             if (!buffer->chars.unicode) goto memoryErrorExit;

330 buffer->numChars = 0;

331 while (chars < end) {

332 numDone = 0;

                 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);

334

                 if (0 == numDone) {

336 result = FALSE;

337 break;

338 }

339 buffer->numChars += numDone;

340 }

341 }

     } else if (kCFStringEncodingNonLossyASCII == encoding) {

343 UTF16Char currentValue = 0;

344 uint8_t character;

345 int8_t mode = __NSNonLossyASCIIMode;

346

347 buffer->isASCII = false;

         buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;

         buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));

         if (!buffer->chars.unicode) goto memoryErrorExit;

351 buffer->numChars = 0;

352

353 while (chars < end) {

354 character = (*chars++);

355

356 switch (mode) {

357 case __NSNonLossyASCIIMode:

                     if (character == '\\') {

359 mode = __NSNonLossyBackslashMode;

                     } else if (character < 0x80) {

361 currentValue = character;

362 } else {

363 mode = __NSNonLossyErrorMode;

364 }

365 break;

366

367 case __NSNonLossyBackslashMode:

                     if ((character == 'U') || (character == 'u')) {

369 mode = __NSNonLossyHexInitialMode;

370 currentValue = 0;

                     } else if ((character >= '0') && (character <= '9')) {

372 mode = __NSNonLossyOctalInitialMode;

373 currentValue = character - '0';

                     } else if (character == '\\') {

375 mode = __NSNonLossyASCIIMode;

376 currentValue = character;

377 } else {

378 mode = __NSNonLossyErrorMode;

379 }

380 break;

381

382 default:

383 if (mode < __NSNonLossyHexFinalMode) {

                         if ((character >= '0') && (character <= '9')) {

                             currentValue = (currentValue << 4) | (character - '0');

                             if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;

387 } else {

                             if (character >= 'a') character -= ('a' - 'A');

                             if ((character >= 'A') && (character <= 'F')) {

                                 currentValue = (currentValue << 4) | ((character - 'A') + 10);

                                 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;

392 } else {

393 mode = __NSNonLossyErrorMode;

394 }

395 }

396 } else {

                         if ((character >= '0') && (character <= '9')) {

                             currentValue = (currentValue << 3) | (character - '0');

                             if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;

400 } else {

401 mode = __NSNonLossyErrorMode;

402 }

403 }

404 break;

405 }

406

407 if (mode == __NSNonLossyASCIIMode) {

                 buffer->chars.unicode[buffer->numChars++] = currentValue;

             } else if (mode == __NSNonLossyErrorMode) {

410 break;

411 }

412 }

         result = ((mode == __NSNonLossyASCIIMode) ? YES : NO);

414 } else {

         const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);

416

         if (!converter) return false;

418

419 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);

420

         if (!isASCIISuperset) buffer->isASCII = false;

422

423 if (buffer->isASCII) {

             for (idx = 0; idx < len; idx++) {

                 if (128 <= chars[idx]) {

426 buffer->isASCII = false;

427 break;

428 }

429 }

430 }

431

         if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {

433 if (buffer->isASCII) {

434 buffer->numChars = len;

                 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;

                 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));

                 if (!buffer->chars.ascii) goto memoryErrorExit;

                 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));

439 } else {

                 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;

                 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));

                 if (!buffer->chars.unicode) goto memoryErrorExit;

443 buffer->numChars = len;

                 if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {

                     for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];

446 } else {

                     for (idx = 0; idx < len; idx++) {

                         if (chars[idx] < 0x80 && isASCIISuperset) {

                             buffer->chars.unicode[idx] = (UniChar)chars[idx];

                         } else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx)) {

451 result = FALSE;

452 break;

453 }

454 }

455 }

456 }

457 } else {

458 if (buffer->isASCII) {

459 buffer->numChars = len;

                 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;

                 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));

                 if (!buffer->chars.ascii) goto memoryErrorExit;

                 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));

464 } else {

                 CFIndex guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);

                 static UInt32 lossyFlag = (UInt32)-1;

467

                 buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;

                 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));

                 if (!buffer->chars.unicode) goto memoryErrorExit;

471

                 if (lossyFlag == (UInt32)-1) lossyFlag = 0;

473

                 if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) result = FALSE;

475 }

476 }

477 }

478

479 if (FALSE == result) {

480 memoryErrorExit: // Added for <rdar://problem/6581621>, but it's not clear whether an exception would be a better option

481 result = FALSE; // In case we come here from a goto

         if (buffer->shouldFreeChars && buffer->chars.unicode) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);

483 buffer->isASCII = !alwaysUnicode;

484 buffer->shouldFreeChars = false;

485 buffer->chars.ascii = NULL;

486 buffer->numChars = 0;

487 }

488 return result;

489 }

490

491

492 /* Create a byte stream from a CFString backing. Can convert a string piece at a time

493 into a fixed size buffer. Returns number of characters converted.

494 Characters that cannot be converted to the specified encoding are represented

495 with the char specified by lossByte; if 0, then lossy conversion is not allowed

496 and conversion stops, returning partial results.

497 Pass buffer==NULL if you don't care about the converted string (but just the convertability,

498 or number of bytes required, indicated by usedBufLen).

499 Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.

500

501 Note: This function is intended to work through CFString functions, so it should work

502 with NSStrings as well as CFStrings.

503 */

 CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {

505 CFIndex totalBytesWritten = 0; /* Number of written bytes */

506 CFIndex numCharsProcessed = 0; /* Number of processed chars */

507 const UniChar *unichars;

508

     if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {

510 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;

511

512 if (!__CFToUTF8) {

             const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);

514 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;

515 }

         numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);

517

     } else if (encoding == kCFStringEncodingNonLossyASCII) {

         const char *hex = "0123456789abcdef";

520 UniChar ch;

521 CFStringInlineBuffer buf;

         CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));

523 while (numCharsProcessed < rangeLen) {

524 CFIndex reqLength; /* Required number of chars to encode this UniChar */

525 CFIndex cnt;

526 char tmp[6];

             ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);

             if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {

529 reqLength = 1;

                 tmp[0] = (char)ch;

531 } else {

                 if (ch == '\\') {

                     tmp[1] = '\\';

534 reqLength = 2;

                 } else if (ch < 256) {  /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */

                     tmp[1] = '0' + (ch >> 6);

                     tmp[2] = '0' + ((ch >> 3) & 7);

                     tmp[3] = '0' + (ch & 7);

539 reqLength = 4;

540 } else { /* \Unnnn */

                     tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java

                     tmp[2] = hex[(ch >> 12) & 0x0f];

                     tmp[3] = hex[(ch >> 8) & 0x0f];

                     tmp[4] = hex[(ch >> 4) & 0x0f];

                     tmp[5] = hex[ch & 0x0f];

546 reqLength = 6;

547 }

                 tmp[0] = '\\';

549 }

550 if (buffer) {

                 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..

552 .*/

                 for (cnt = 0; cnt < reqLength; cnt++) {

                     buffer[totalBytesWritten + cnt] = tmp[cnt];

555 }

556 }

557 totalBytesWritten += reqLength;

558 numCharsProcessed++;

559 }

     } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {

         CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);

562 numCharsProcessed = rangeLen;

         if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {

             numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;

565 }

         totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;

567 if (buffer) {

568 if (extraForBOM) { /* Generate BOM */

569 #if __CF_BIG_ENDIAN__

                 *buffer++ = 0xfe; *buffer++ = 0xff;

571 #else

                 *buffer++ = 0xff; *buffer++ = 0xfe;

573 #endif

574 }

             CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);

             if ((__CF_BIG_ENDIAN__ ?  kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap

577 UTF16Char *characters = (UTF16Char *)buffer;

                 const UTF16Char *limit = characters + numCharsProcessed;

579

580 while (characters < limit) {

                     *characters = CFSwapInt16(*characters);

582 ++characters;

583 }

584 }

585 }

     } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {

587 UTF32Char character;

588 CFStringInlineBuffer buf;

589 UTF32Char *characters = (UTF32Char *)buffer;

590

         bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);

         if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {

593 totalBytesWritten += sizeof(UTF32Char);

594 if (characters) {

                 if (totalBytesWritten > max) { // insufficient buffer

596 totalBytesWritten = 0;

597 } else {

598 *(characters++) = 0x0000FEFF;

599 }

600 }

601 }

602

         CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));

604 while (numCharsProcessed < rangeLen) {

             character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);

606

             if (CFUniCharIsSurrogateHighCharacter(character)) {

608 UTF16Char otherCharacter;

609

                 if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {

                     character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);

612 } else if (lossByte) {

613 character = lossByte;

614 } else {

615 break;

616 }

             } else if (CFUniCharIsSurrogateLowCharacter(character)) {

618 if (lossByte) {

619 character = lossByte;

620 } else {

621 break;

622 }

623 }

624

625 totalBytesWritten += sizeof(UTF32Char);

626

627 if (characters) {

628 if (totalBytesWritten > max) {

629 totalBytesWritten -= sizeof(UTF32Char);

630 break;

631 }

                 *(characters++) = (swap ? CFSwapInt32(character) : character);

633 }

634

             numCharsProcessed += (character > 0xFFFF ? 2 : 1);

636 }

637 } else {

638 CFIndex numChars;

639 UInt32 flags;

640 const unsigned char *cString = NULL;

641 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);

642

         if (!CFStringEncodingIsValidEncoding(encoding)) return 0;

644

         if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion

646 const unsigned char *ptr;

             if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {

648 ptr = (cString += rangeLoc);

                 if (__CFStringGetEightBitStringEncoding() == encoding) {

                     numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);

                     if (buffer) memmove(buffer, cString, numCharsProcessed);

                     if (usedBufLen) *usedBufLen = numCharsProcessed;

653 return numCharsProcessed;

654 }

655

                 CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;

                 while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {

658 ++ptr;

659 --rangeLen;

660 }

661 numCharsProcessed = ptr - cString;

662 if (buffer) {

                     numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);

                     memmove(buffer, cString, numCharsProcessed);

665 buffer += numCharsProcessed;

666 max -= numCharsProcessed;

667 }

                 if (!rangeLen || (buffer && (max == 0))) {

                     if (usedBufLen) *usedBufLen = numCharsProcessed;

670 return numCharsProcessed;

671 }

672 rangeLoc += numCharsProcessed;

673 totalBytesWritten += numCharsProcessed;

674 }

             if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {

                 ptr = (cString += (rangeLoc + 1));

                 if (__CFStringGetEightBitStringEncoding() == encoding) {

                     numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);

                     if (buffer) memmove(buffer, cString, numCharsProcessed);

                     if (usedBufLen) *usedBufLen = numCharsProcessed;

681 return numCharsProcessed;

682 }

                 while (*ptr < 0x80 && rangeLen > 0) {

684 ++ptr;

685 --rangeLen;

686 }

687 numCharsProcessed = ptr - cString;

688 if (buffer) {

                     numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);

                     memmove(buffer, cString, numCharsProcessed);

691 buffer += numCharsProcessed;

692 max -= numCharsProcessed;

693 }

                 if (!rangeLen || (buffer && (max == 0))) {

                     if (usedBufLen) *usedBufLen = numCharsProcessed;

696 return numCharsProcessed;

697 }

698 rangeLoc += numCharsProcessed;

699 totalBytesWritten += numCharsProcessed;

700 }

701 }

702

         if (!buffer) max = 0;

704

705 // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion

706 // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).

         flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());

708

         if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string

             CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);

711 } else {

712 UniChar charBuf[kCFCharConversionBufferLength];

713 CFIndex currentLength;

714 CFIndex usedLen;

             CFIndex lastUsedLen = 0, lastNumChars = 0;

716 uint32_t result;

717 uint32_t streamingMask;

718 uint32_t streamID = 0;

719 #define MAX_DECOMP_LEN (6)

720

             while (rangeLen > 0) {

                 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);

                 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);

724

725 // could be in the middle of surrogate pair; back up.

                 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;

727

                 streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);

729

                 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);

731 streamID = CFStringEncodingStreamIDFromMask(result);

732 result &= ~CFStringEncodingStreamIDMask;

733

734 if (result != kCFStringEncodingConversionSuccess) {

735 if (kCFStringEncodingInvalidInputStream == result) {

736 CFRange composedRange;

737 // Check the tail

                         if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {

                             composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);

740

                             if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {

                                 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);

743 streamID = CFStringEncodingStreamIDFromMask(result);

744 result &= ~CFStringEncodingStreamIDMask;

745 }

746 }

747

748 // Check the head

                         if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {

                             composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);

751

                             if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {

753 // Try if the composed range can be converted

                                 CFStringGetCharacters(string, composedRange, charBuf);

755

                                 if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run

757 CFIndex lastRangeLoc = rangeLoc - lastNumChars;

758

759 currentLength = composedRange.location - lastRangeLoc;

                                     CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);

761

                                     result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);

763 streamID = CFStringEncodingStreamIDFromMask(result);

764 result &= ~CFStringEncodingStreamIDMask;

765

                                     if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run

767 // Looks good. back up

768 totalBytesWritten -= lastUsedLen;

769 numCharsProcessed -= lastNumChars;

770

771 rangeLoc = lastRangeLoc;

772 rangeLen += lastNumChars;

773

774 if (max) {

775 buffer -= lastUsedLen;

776 max += lastUsedLen;

777 }

778 }

779 }

780 }

781 }

782 }

783

                     if (kCFStringEncodingConversionSuccess != result) { // really failed

785 totalBytesWritten += usedLen;

786 numCharsProcessed += numChars;

787 break;

788 }

789 }

790

791 totalBytesWritten += usedLen;

792 numCharsProcessed += numChars;

793

794 rangeLoc += numChars;

795 rangeLen -= numChars;

796 if (max) {

797 buffer += usedLen;

798 max -= usedLen;

                     if (max <= 0) break;

800 }

801 lastUsedLen = usedLen; lastNumChars = numChars;

802 flags &= ~kCFStringEncodingPrependBOM;

803 }

804 }

805 }

     if (usedBufLen) *usedBufLen = totalBytesWritten;

807 return numCharsProcessed;

808 }

809

 CFStringRef CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc, const char *buffer) {

     return CFStringCreateWithCString(alloc, buffer, CFStringFileSystemEncoding());

812 }

813

814 CFIndex CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string) {

815 CFIndex len = CFStringGetLength(string);

816 CFStringEncoding enc = CFStringGetFastestEncoding(string);

817 switch (enc) {

818 case kCFStringEncodingASCII:

819 case kCFStringEncodingMacRoman:

             if (len > (LONG_MAX - 1L) / 3L) return kCFNotFound;     // Avoid wrap-around

             return len * 3L + 1L;

822 default:

             if (len > (LONG_MAX - 1L) / 9L) return kCFNotFound;     // Avoid wrap-around

             return len * 9L + 1L;

825 }

826 }

827

 Boolean CFStringGetFileSystemRepresentation(CFStringRef string, char *buffer, CFIndex maxBufLen) {

829 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED

830 #define MAX_STACK_BUFFER_LEN (255)

     const UTF16Char *characters = CFStringGetCharactersPtr(string);

832 const char *origBuffer = buffer;

     const char *bufferLimit = buffer + maxBufLen;

834 CFIndex length = CFStringGetLength(string);

835 CFIndex usedBufLen;

836

     if (maxBufLen < length) return false; // Since we're using UTF-8, the byte length is never shorter than the char length. Also, it filters out 0 == maxBufLen

838

839 if (NULL == characters) {

840 UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];

         CFRange range = CFRangeMake(0, 0);

         const char *bytes = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding());

843

844 if (NULL != bytes) {

845 const char *originalBytes = bytes;

             const char *bytesLimit = bytes + length;

847

             while ((bytes < bytesLimit) && (buffer < bufferLimit) && (0 == (*bytes & 0x80))) *(buffer++) = *(bytes++);

849

850 range.location = bytes - originalBytes;

851 }

         while ((range.location < length) && (buffer < bufferLimit)) {

             range.length = length - range.location;

             if (range.length > MAX_STACK_BUFFER_LEN) range.length = MAX_STACK_BUFFER_LEN;

855

             CFStringGetCharacters(string, range, charactersBuffer);

             if ((range.length == MAX_STACK_BUFFER_LEN) && CFUniCharIsSurrogateHighCharacter(charactersBuffer[MAX_STACK_BUFFER_LEN - 1])) --range.length; // Backup for a high surrogate

858

             if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, bufferLimit - buffer, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;

860

861 buffer += usedBufLen;

862 range.location += range.length;

863 }

864 } else {

         if (!CFUniCharDecompose(characters, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;

866 buffer += usedBufLen;

867 }

868

     if (buffer < bufferLimit) { // Since the filename has its own limit, this is ok for now

870 *buffer = '\0';

         if (_CFExecutableLinkedOnOrAfter(CFSystemVersionLion)) {

             while (origBuffer < buffer) if (*origBuffer++ == 0) {       // There's a zero in there. Now see if the rest are all zeroes.

                 while (origBuffer < buffer) if (*origBuffer++ != 0) return false;       // Embedded NULLs should cause failure: <rdar://problem/5863219>

874 }

875 }

876 return true;

877 } else {

878 return false;

879 }

880 #else

     return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());

882 #endif

883 }

884

 Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {

     return CFStringGetFileSystemRepresentation(string, (char *)buffer, maxBufLen);

887 }

888

889

890 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)

891

892 /* This function is used to obtain users' default script/region code.

893 The function first looks at environment variable __kCFUserEncodingEnvVariableName, then, reads the configuration file in user's home directory.

894 */

 void _CFStringGetUserDefaultEncoding(UInt32 *oScriptValue, UInt32 *oRegionValue) {

896 char *stringValue;

897 char buffer[__kCFMaxDefaultEncodingFileLength];

898 int uid = getuid();

899

     if ((stringValue = (char *)__CFgetenv(__kCFUserEncodingEnvVariableName)) != NULL) {

         if ((uid == strtol_l(stringValue, &stringValue, 0, NULL)) && (':' == *stringValue)) {

902 ++stringValue;

903 } else {

904 stringValue = NULL;

905 }

906 }

907

     if ((stringValue == NULL) && ((uid > 0) || __CFgetenv("HOME"))) {

         char passwdExtraBuf[1000 + MAXPATHLEN];  // Extra memory buffer for getpwuid_r(); no clue as to how large this should be...

910 struct passwd passwdBuf, *passwdp = NULL;

911

         switch (getpwuid_r((uid_t)uid, &passwdBuf, passwdExtraBuf, sizeof(passwdExtraBuf), &passwdp)) {

913 case 0: // Success

914 break;

915 case ERANGE: // Somehow we didn't give it enough memory; let the system handle the storage this time; but beware 5778609

                 passwdp = getpwuid((uid_t)uid); 

917 break;

918 default:

919 passwdp = NULL;

920 }

921 if (passwdp) {

             char filename[MAXPATHLEN + 1];

923

924 const char *path = NULL;

925 if (!issetugid()) {

                 path = __CFgetenv("CFFIXED_USER_HOME");

927 }

928 if (!path) {

929 path = passwdp->pw_dir;

930 }

931

             strlcpy(filename, path, sizeof(filename));

             strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));

934

             int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;

             int fd = open(filename, O_RDONLY, 0);

             if (fd == -1) {

938 // Cannot open the file. Let's fallback to smRoman/verUS

                 snprintf(filename, sizeof(filename), "0x%X:0:0", uid);

                 setenv(__kCFUserEncodingEnvVariableName, filename, 1);

941 } else {

942 ssize_t readSize;

                 readSize = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);

                 buffer[(readSize < 0 ? 0 : readSize)] = '\0';

945 close(fd);

946 stringValue = buffer;

947

948 // Well, we already have a buffer, let's reuse it

                 snprintf(filename, sizeof(filename), "0x%X:%s", uid, buffer);

                 setenv(__kCFUserEncodingEnvVariableName, filename, 1);

951 }

             if (-1 != no_hang_fd) close(no_hang_fd);

953 }

954 }

955

956 if (stringValue) {

         *oScriptValue = strtol_l(stringValue, &stringValue, 0, NULL);

958 // We force using MacRoman for Arabic/Hebrew users <rdar://problem/17633551> When changing language to Arabic and Hebrew, set the default user encoding to MacRoman, not MacArabic/MacHebrew

         if ((*oScriptValue == kCFStringEncodingMacArabic) || (*oScriptValue == kCFStringEncodingMacHebrew)) *oScriptValue = kCFStringEncodingMacRoman;

         if (*stringValue == ':') {

             if (oRegionValue) *oRegionValue = strtol_l(++stringValue, NULL, 0, NULL);

962 return;

963 }

964 }

965

966 // Falling back

     *oScriptValue = 0; // smRoman

     if (oRegionValue) *oRegionValue = 0; // verUS

969 }

970

 void _CFStringGetInstallationEncodingAndRegion(uint32_t *encoding, uint32_t *region) {

972 char buffer[__kCFMaxDefaultEncodingFileLength];

973 char *stringValue = NULL;

974

975 *encoding = 0;

976 *region = 0;

977

     struct passwd *passwdp = getpwuid((uid_t)0);

979 if (passwdp) {

         const char *path = passwdp->pw_dir;

981

         char filename[MAXPATHLEN + 1];

         strlcpy(filename, path, sizeof(filename));

         strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));

985

         int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;

         int fd = open(filename, O_RDONLY, 0);

         if (0 <= fd) {

             ssize_t size = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);

             buffer[(size < 0 ? 0 : size)] = '\0';

991 close(fd);

992 stringValue = buffer;

993 }

         if (-1 != no_hang_fd) close(no_hang_fd);

995 }

996

997 if (stringValue) {

         *encoding = strtol_l(stringValue, &stringValue, 0, NULL);

999 // We force using MacRoman for Arabic/Hebrew users <rdar://problem/17633551> When changing language to Arabic and Hebrew, set the default user encoding to MacRoman, not MacArabic/MacHebrew

         if ((*encoding == kCFStringEncodingMacArabic) || (*encoding == kCFStringEncodingMacHebrew)) *encoding = kCFStringEncodingMacRoman;

         if (*stringValue == ':') *region = strtol_l(++stringValue, NULL, 0, NULL);

1002 }

1003 }

1004

1005 Boolean _CFStringSaveUserDefaultEncoding(UInt32 iScriptValue, UInt32 iRegionValue) {

1006 Boolean success = false;

     struct passwd *passwdp = getpwuid(getuid());

1008 if (passwdp) {

         const char *path = passwdp->pw_dir;

1010 if (!issetugid()) {

             const char *value = __CFgetenv("CFFIXED_USER_HOME");

             if (value) path = value; // override

1013 }

1014

         char filename[MAXPATHLEN + 1];

         strlcpy(filename, path, sizeof(filename));

         strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));

1018

         int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;

         (void)unlink(filename);

         int fd = open(filename, O_WRONLY|O_CREAT, 0400);

         if (0 <= fd) {

1023 char buffer[__kCFMaxDefaultEncodingFileLength];

1024 // We force using MacRoman for Arabic/Hebrew users <rdar://problem/17633551> When changing language to Arabic and Hebrew, set the default user encoding to MacRoman, not MacArabic/MacHebrew

             if ((iScriptValue == kCFStringEncodingMacArabic) || (iScriptValue == kCFStringEncodingMacHebrew)) iScriptValue = kCFStringEncodingMacRoman;

             size_t size = snprintf(buffer, __kCFMaxDefaultEncodingFileLength, "0x%X:0x%X", (unsigned int)iScriptValue, (unsigned int)iRegionValue);

1027 if (size <= __kCFMaxDefaultEncodingFileLength) {

                 int ret = write(fd, buffer, size);

                 if (size <= ret) success = true;

1030 }

1031 int save_err = errno;

1032 close(fd);

1033 errno = save_err;

1034 }

1035 int save_err = errno;

         if (-1 != no_hang_fd) close(no_hang_fd);

1037 errno = save_err;

1038 }

1039 return success;

1040 }

1041

1042 #endif

1043