]> git.saurik.com Git - apple/cf.git/blob - CFStringEncodings.c
876e204ed8dfdd782a1eddcf4deffec72eb72d11
[apple/cf.git] / CFStringEncodings.c
1 /*
2 * Copyright (c) 2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFStringEncodings.c
25 Copyright (c) 1999-2014, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFString.h>
31 #include <CoreFoundation/CFByteOrder.h>
32 #include <CoreFoundation/CFPriv.h>
33 #include <string.h>
34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
35 #include <CoreFoundation/CFUniChar.h>
36 #include <CoreFoundation/CFUnicodeDecomposition.h>
37 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
38 #include <stdlib.h>
39 #include <fcntl.h>
40 #include <pwd.h>
41 #include <sys/param.h>
42 #include <unistd.h>
43 #include <string.h>
44 #include <stdio.h>
45 #include <xlocale.h>
46 #include <CoreFoundation/CFStringDefaultEncoding.h>
47 #endif
48
49 static bool __CFWantsToUseASCIICompatibleConversion = false;
50 CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) { return __CFWantsToUseASCIICompatibleConversion; }
51
52 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
53 __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
54 }
55
56 Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
57
58 // To avoid early initialization issues, we just initialize this here
59 // This should not be const as it is changed
60 CF_PRIVATE UniChar __CFCharToUniCharTable[256] = {
61 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
62 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
63 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
64 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
65 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
66 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
67 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
68 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
69 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
70 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
71 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
72 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
73 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
74 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
75 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
76 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
77 };
78
79 CF_PRIVATE void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
80 if (__CFCharToUniCharFunc != func) {
81 int ch;
82 __CFCharToUniCharFunc = func;
83 if (func) {
84 for (ch = 128; ch < 256; ch++) {
85 UniChar uch;
86 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
87 }
88 } else { // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
89 for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
90 }
91 }
92 }
93
94 CF_PRIVATE void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
95 CFIndex idx;
96 for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
97 }
98
99
100 /* The minimum length the output buffers should be in the above functions
101 */
102 #define kCFCharConversionBufferLength 512
103
104
105 #define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))
106 #define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))
107
108 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
109 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
110 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
111 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
112 !!! converterFlags is only used for the UTF8 converter at this point
113 */
114 Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
115 return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
116 }
117
118 enum {
119 __NSNonLossyErrorMode = -1,
120 __NSNonLossyASCIIMode = 0,
121 __NSNonLossyBackslashMode = 1,
122 __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
123 __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
124 __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
125 __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
126 };
127
128 Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
129 CFIndex idx;
130 const uint8_t *chars = (const uint8_t *)bytes;
131 const uint8_t *end = chars + len;
132 Boolean result = TRUE;
133
134 if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
135
136 buffer->isASCII = !alwaysUnicode;
137 buffer->shouldFreeChars = false;
138 buffer->numChars = 0;
139
140 if (0 == len) return true;
141
142 buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
143
144 if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16
145 const UTF16Char *src = (const UTF16Char *)bytes;
146 const UTF16Char *limit = src + (len / sizeof(UTF16Char)); // <rdar://problem/7854378> avoiding odd len issue
147 bool swap = false;
148
149 if (kCFStringEncodingUTF16 == encoding) {
150 UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);
151
152 #if __CF_BIG_ENDIAN__
153 if (bom == 0xFFFE) swap = true;
154 #else
155 if (bom != 0xFEFF) swap = true;
156 #endif
157 if (bom) useClientsMemoryPtr = NULL;
158 } else {
159 #if __CF_BIG_ENDIAN__
160 if (kCFStringEncodingUTF16LE == encoding) swap = true;
161 #else
162 if (kCFStringEncodingUTF16BE == encoding) swap = true;
163 #endif
164 }
165
166 buffer->numChars = limit - src;
167
168 if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
169 *useClientsMemoryPtr = true;
170 buffer->chars.unicode = (UniChar *)src;
171 buffer->isASCII = false;
172 } else {
173 if (buffer->isASCII) { // Let's see if we can reduce the Unicode down to ASCII...
174 const UTF16Char *characters = src;
175 UTF16Char mask = (swap ? 0x80FF : 0xFF80);
176
177 while (characters < limit) {
178 if (*(characters++) & mask) {
179 buffer->isASCII = false;
180 break;
181 }
182 }
183 }
184
185 if (buffer->isASCII) {
186 uint8_t *dst;
187 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
188 if (buffer->numChars > MAX_LOCAL_CHARS) {
189 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
190 if (!buffer->chars.ascii) goto memoryErrorExit;
191 buffer->shouldFreeChars = true;
192 } else {
193 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
194 }
195 }
196 dst = buffer->chars.ascii;
197
198 if (swap) {
199 while (src < limit) *(dst++) = (*(src++) >> 8);
200 } else {
201 while (src < limit) *(dst++) = (uint8_t)*(src++);
202 }
203 } else {
204 UTF16Char *dst;
205
206 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
207 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
208 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
209 if (!buffer->chars.unicode) goto memoryErrorExit;
210 buffer->shouldFreeChars = true;
211 } else {
212 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
213 }
214 }
215 dst = buffer->chars.unicode;
216
217 if (swap) {
218 while (src < limit) *(dst++) = CFSwapInt16(*(src++));
219 } else {
220 memmove(dst, src, buffer->numChars * sizeof(UTF16Char));
221 }
222 }
223 }
224 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
225 const UTF32Char *src = (const UTF32Char *)bytes;
226 const UTF32Char *limit = src + (len / sizeof(UTF32Char)); // <rdar://problem/7854378> avoiding odd len issue
227 bool swap = false;
228 static bool strictUTF32 = (bool)-1;
229
230 if ((bool)-1 == strictUTF32) strictUTF32 = (1 != 0);
231
232 if (kCFStringEncodingUTF32 == encoding) {
233 UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);
234
235 #if __CF_BIG_ENDIAN__
236 if (bom == 0xFFFE0000) swap = true;
237 #else
238 if (bom != 0x0000FEFF) swap = true;
239 #endif
240 } else {
241 #if __CF_BIG_ENDIAN__
242 if (kCFStringEncodingUTF32LE == encoding) swap = true;
243 #else
244 if (kCFStringEncodingUTF32BE == encoding) swap = true;
245 #endif
246 }
247
248 buffer->numChars = limit - src;
249
250 {
251 // Let's see if we have non-ASCII or non-BMP
252 const UTF32Char *characters = src;
253 UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);
254 UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);
255
256 while (characters < limit) {
257 if (*characters & asciiMask) {
258 buffer->isASCII = false;
259 if (*characters & bmpMask) {
260 if (strictUTF32 && ((swap ? (UTF32Char)CFSwapInt32(*characters) : *characters) > 0x10FFFF)) return false; // outside of Unicode Scaler Value. Haven't allocated buffer, yet.
261 ++(buffer->numChars);
262 }
263 }
264 ++characters;
265 }
266 }
267
268 if (buffer->isASCII) {
269 uint8_t *dst;
270 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
271 if (buffer->numChars > MAX_LOCAL_CHARS) {
272 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
273 if (!buffer->chars.ascii) goto memoryErrorExit;
274 buffer->shouldFreeChars = true;
275 } else {
276 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
277 }
278 }
279 dst = buffer->chars.ascii;
280
281 if (swap) {
282 while (src < limit) *(dst++) = (*(src++) >> 24);
283 } else {
284 while (src < limit) *(dst++) = *(src++);
285 }
286 } else {
287 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
288 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
289 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
290 if (!buffer->chars.unicode) goto memoryErrorExit;
291 buffer->shouldFreeChars = true;
292 } else {
293 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
294 }
295 }
296 result = (CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, (strictUTF32 ? false : true), __CF_BIG_ENDIAN__ ? !swap : swap) ? TRUE : FALSE);
297 }
298 } else if (kCFStringEncodingUTF8 == encoding) {
299 if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) { // If UTF8 BOM, skip
300 chars += 3;
301 len -= 3;
302 if (0 == len) return true;
303 }
304 if (buffer->isASCII) {
305 for (idx = 0; idx < len; idx++) {
306 if (128 <= chars[idx]) {
307 buffer->isASCII = false;
308 break;
309 }
310 }
311 }
312 if (buffer->isASCII) {
313 buffer->numChars = len;
314 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
315 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
316 if (!buffer->chars.ascii) goto memoryErrorExit;
317 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
318 } else {
319 CFIndex numDone;
320 static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
321
322 if (!__CFFromUTF8) {
323 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
324 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
325 }
326
327 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
328 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
329 if (!buffer->chars.unicode) goto memoryErrorExit;
330 buffer->numChars = 0;
331 while (chars < end) {
332 numDone = 0;
333 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
334
335 if (0 == numDone) {
336 result = FALSE;
337 break;
338 }
339 buffer->numChars += numDone;
340 }
341 }
342 } else if (kCFStringEncodingNonLossyASCII == encoding) {
343 UTF16Char currentValue = 0;
344 uint8_t character;
345 int8_t mode = __NSNonLossyASCIIMode;
346
347 buffer->isASCII = false;
348 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
349 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
350 if (!buffer->chars.unicode) goto memoryErrorExit;
351 buffer->numChars = 0;
352
353 while (chars < end) {
354 character = (*chars++);
355
356 switch (mode) {
357 case __NSNonLossyASCIIMode:
358 if (character == '\\') {
359 mode = __NSNonLossyBackslashMode;
360 } else if (character < 0x80) {
361 currentValue = character;
362 } else {
363 mode = __NSNonLossyErrorMode;
364 }
365 break;
366
367 case __NSNonLossyBackslashMode:
368 if ((character == 'U') || (character == 'u')) {
369 mode = __NSNonLossyHexInitialMode;
370 currentValue = 0;
371 } else if ((character >= '0') && (character <= '9')) {
372 mode = __NSNonLossyOctalInitialMode;
373 currentValue = character - '0';
374 } else if (character == '\\') {
375 mode = __NSNonLossyASCIIMode;
376 currentValue = character;
377 } else {
378 mode = __NSNonLossyErrorMode;
379 }
380 break;
381
382 default:
383 if (mode < __NSNonLossyHexFinalMode) {
384 if ((character >= '0') && (character <= '9')) {
385 currentValue = (currentValue << 4) | (character - '0');
386 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
387 } else {
388 if (character >= 'a') character -= ('a' - 'A');
389 if ((character >= 'A') && (character <= 'F')) {
390 currentValue = (currentValue << 4) | ((character - 'A') + 10);
391 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
392 } else {
393 mode = __NSNonLossyErrorMode;
394 }
395 }
396 } else {
397 if ((character >= '0') && (character <= '9')) {
398 currentValue = (currentValue << 3) | (character - '0');
399 if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
400 } else {
401 mode = __NSNonLossyErrorMode;
402 }
403 }
404 break;
405 }
406
407 if (mode == __NSNonLossyASCIIMode) {
408 buffer->chars.unicode[buffer->numChars++] = currentValue;
409 } else if (mode == __NSNonLossyErrorMode) {
410 break;
411 }
412 }
413 result = ((mode == __NSNonLossyASCIIMode) ? YES : NO);
414 } else {
415 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
416
417 if (!converter) return false;
418
419 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
420
421 if (!isASCIISuperset) buffer->isASCII = false;
422
423 if (buffer->isASCII) {
424 for (idx = 0; idx < len; idx++) {
425 if (128 <= chars[idx]) {
426 buffer->isASCII = false;
427 break;
428 }
429 }
430 }
431
432 if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
433 if (buffer->isASCII) {
434 buffer->numChars = len;
435 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
436 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
437 if (!buffer->chars.ascii) goto memoryErrorExit;
438 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
439 } else {
440 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
441 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
442 if (!buffer->chars.unicode) goto memoryErrorExit;
443 buffer->numChars = len;
444 if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
445 for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
446 } else {
447 for (idx = 0; idx < len; idx++) {
448 if (chars[idx] < 0x80 && isASCIISuperset) {
449 buffer->chars.unicode[idx] = (UniChar)chars[idx];
450 } else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx)) {
451 result = FALSE;
452 break;
453 }
454 }
455 }
456 }
457 } else {
458 if (buffer->isASCII) {
459 buffer->numChars = len;
460 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
461 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
462 if (!buffer->chars.ascii) goto memoryErrorExit;
463 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
464 } else {
465 CFIndex guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
466 static UInt32 lossyFlag = (UInt32)-1;
467
468 buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
469 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
470 if (!buffer->chars.unicode) goto memoryErrorExit;
471
472 if (lossyFlag == (UInt32)-1) lossyFlag = 0;
473
474 if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) result = FALSE;
475 }
476 }
477 }
478
479 if (FALSE == result) {
480 memoryErrorExit: // Added for <rdar://problem/6581621>, but it's not clear whether an exception would be a better option
481 result = FALSE; // In case we come here from a goto
482 if (buffer->shouldFreeChars && buffer->chars.unicode) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
483 buffer->isASCII = !alwaysUnicode;
484 buffer->shouldFreeChars = false;
485 buffer->chars.ascii = NULL;
486 buffer->numChars = 0;
487 }
488 return result;
489 }
490
491
492 /* Create a byte stream from a CFString backing. Can convert a string piece at a time
493 into a fixed size buffer. Returns number of characters converted.
494 Characters that cannot be converted to the specified encoding are represented
495 with the char specified by lossByte; if 0, then lossy conversion is not allowed
496 and conversion stops, returning partial results.
497 Pass buffer==NULL if you don't care about the converted string (but just the convertability,
498 or number of bytes required, indicated by usedBufLen).
499 Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
500
501 Note: This function is intended to work through CFString functions, so it should work
502 with NSStrings as well as CFStrings.
503 */
504 CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
505 CFIndex totalBytesWritten = 0; /* Number of written bytes */
506 CFIndex numCharsProcessed = 0; /* Number of processed chars */
507 const UniChar *unichars;
508
509 if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
510 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
511
512 if (!__CFToUTF8) {
513 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
514 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
515 }
516 numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
517
518 } else if (encoding == kCFStringEncodingNonLossyASCII) {
519 const char *hex = "0123456789abcdef";
520 UniChar ch;
521 CFStringInlineBuffer buf;
522 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
523 while (numCharsProcessed < rangeLen) {
524 CFIndex reqLength; /* Required number of chars to encode this UniChar */
525 CFIndex cnt;
526 char tmp[6];
527 ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
528 if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
529 reqLength = 1;
530 tmp[0] = (char)ch;
531 } else {
532 if (ch == '\\') {
533 tmp[1] = '\\';
534 reqLength = 2;
535 } else if (ch < 256) { /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
536 tmp[1] = '0' + (ch >> 6);
537 tmp[2] = '0' + ((ch >> 3) & 7);
538 tmp[3] = '0' + (ch & 7);
539 reqLength = 4;
540 } else { /* \Unnnn */
541 tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
542 tmp[2] = hex[(ch >> 12) & 0x0f];
543 tmp[3] = hex[(ch >> 8) & 0x0f];
544 tmp[4] = hex[(ch >> 4) & 0x0f];
545 tmp[5] = hex[ch & 0x0f];
546 reqLength = 6;
547 }
548 tmp[0] = '\\';
549 }
550 if (buffer) {
551 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
552 .*/
553 for (cnt = 0; cnt < reqLength; cnt++) {
554 buffer[totalBytesWritten + cnt] = tmp[cnt];
555 }
556 }
557 totalBytesWritten += reqLength;
558 numCharsProcessed++;
559 }
560 } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
561 CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
562 numCharsProcessed = rangeLen;
563 if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
564 numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
565 }
566 totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
567 if (buffer) {
568 if (extraForBOM) { /* Generate BOM */
569 #if __CF_BIG_ENDIAN__
570 *buffer++ = 0xfe; *buffer++ = 0xff;
571 #else
572 *buffer++ = 0xff; *buffer++ = 0xfe;
573 #endif
574 }
575 CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
576 if ((__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap
577 UTF16Char *characters = (UTF16Char *)buffer;
578 const UTF16Char *limit = characters + numCharsProcessed;
579
580 while (characters < limit) {
581 *characters = CFSwapInt16(*characters);
582 ++characters;
583 }
584 }
585 }
586 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
587 UTF32Char character;
588 CFStringInlineBuffer buf;
589 UTF32Char *characters = (UTF32Char *)buffer;
590
591 bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);
592 if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
593 totalBytesWritten += sizeof(UTF32Char);
594 if (characters) {
595 if (totalBytesWritten > max) { // insufficient buffer
596 totalBytesWritten = 0;
597 } else {
598 *(characters++) = 0x0000FEFF;
599 }
600 }
601 }
602
603 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
604 while (numCharsProcessed < rangeLen) {
605 character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
606
607 if (CFUniCharIsSurrogateHighCharacter(character)) {
608 UTF16Char otherCharacter;
609
610 if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
611 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
612 } else if (lossByte) {
613 character = lossByte;
614 } else {
615 break;
616 }
617 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
618 if (lossByte) {
619 character = lossByte;
620 } else {
621 break;
622 }
623 }
624
625 totalBytesWritten += sizeof(UTF32Char);
626
627 if (characters) {
628 if (totalBytesWritten > max) {
629 totalBytesWritten -= sizeof(UTF32Char);
630 break;
631 }
632 *(characters++) = (swap ? CFSwapInt32(character) : character);
633 }
634
635 numCharsProcessed += (character > 0xFFFF ? 2 : 1);
636 }
637 } else {
638 CFIndex numChars;
639 UInt32 flags;
640 const unsigned char *cString = NULL;
641 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
642
643 if (!CFStringEncodingIsValidEncoding(encoding)) return 0;
644
645 if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
646 const unsigned char *ptr;
647 if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
648 ptr = (cString += rangeLoc);
649 if (__CFStringGetEightBitStringEncoding() == encoding) {
650 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
651 if (buffer) memmove(buffer, cString, numCharsProcessed);
652 if (usedBufLen) *usedBufLen = numCharsProcessed;
653 return numCharsProcessed;
654 }
655
656 CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;
657 while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {
658 ++ptr;
659 --rangeLen;
660 }
661 numCharsProcessed = ptr - cString;
662 if (buffer) {
663 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
664 memmove(buffer, cString, numCharsProcessed);
665 buffer += numCharsProcessed;
666 max -= numCharsProcessed;
667 }
668 if (!rangeLen || (buffer && (max == 0))) {
669 if (usedBufLen) *usedBufLen = numCharsProcessed;
670 return numCharsProcessed;
671 }
672 rangeLoc += numCharsProcessed;
673 totalBytesWritten += numCharsProcessed;
674 }
675 if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
676 ptr = (cString += (rangeLoc + 1));
677 if (__CFStringGetEightBitStringEncoding() == encoding) {
678 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
679 if (buffer) memmove(buffer, cString, numCharsProcessed);
680 if (usedBufLen) *usedBufLen = numCharsProcessed;
681 return numCharsProcessed;
682 }
683 while (*ptr < 0x80 && rangeLen > 0) {
684 ++ptr;
685 --rangeLen;
686 }
687 numCharsProcessed = ptr - cString;
688 if (buffer) {
689 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
690 memmove(buffer, cString, numCharsProcessed);
691 buffer += numCharsProcessed;
692 max -= numCharsProcessed;
693 }
694 if (!rangeLen || (buffer && (max == 0))) {
695 if (usedBufLen) *usedBufLen = numCharsProcessed;
696 return numCharsProcessed;
697 }
698 rangeLoc += numCharsProcessed;
699 totalBytesWritten += numCharsProcessed;
700 }
701 }
702
703 if (!buffer) max = 0;
704
705 // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
706 // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
707 flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());
708
709 if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string
710 CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
711 } else {
712 UniChar charBuf[kCFCharConversionBufferLength];
713 CFIndex currentLength;
714 CFIndex usedLen;
715 CFIndex lastUsedLen = 0, lastNumChars = 0;
716 uint32_t result;
717 uint32_t streamingMask;
718 uint32_t streamID = 0;
719 #define MAX_DECOMP_LEN (6)
720
721 while (rangeLen > 0) {
722 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
723 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
724
725 // could be in the middle of surrogate pair; back up.
726 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
727
728 streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);
729
730 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);
731 streamID = CFStringEncodingStreamIDFromMask(result);
732 result &= ~CFStringEncodingStreamIDMask;
733
734 if (result != kCFStringEncodingConversionSuccess) {
735 if (kCFStringEncodingInvalidInputStream == result) {
736 CFRange composedRange;
737 // Check the tail
738 if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
739 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
740
741 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
742 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
743 streamID = CFStringEncodingStreamIDFromMask(result);
744 result &= ~CFStringEncodingStreamIDMask;
745 }
746 }
747
748 // Check the head
749 if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
750 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
751
752 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
753 // Try if the composed range can be converted
754 CFStringGetCharacters(string, composedRange, charBuf);
755
756 if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
757 CFIndex lastRangeLoc = rangeLoc - lastNumChars;
758
759 currentLength = composedRange.location - lastRangeLoc;
760 CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
761
762 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);
763 streamID = CFStringEncodingStreamIDFromMask(result);
764 result &= ~CFStringEncodingStreamIDMask;
765
766 if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run
767 // Looks good. back up
768 totalBytesWritten -= lastUsedLen;
769 numCharsProcessed -= lastNumChars;
770
771 rangeLoc = lastRangeLoc;
772 rangeLen += lastNumChars;
773
774 if (max) {
775 buffer -= lastUsedLen;
776 max += lastUsedLen;
777 }
778 }
779 }
780 }
781 }
782 }
783
784 if (kCFStringEncodingConversionSuccess != result) { // really failed
785 totalBytesWritten += usedLen;
786 numCharsProcessed += numChars;
787 break;
788 }
789 }
790
791 totalBytesWritten += usedLen;
792 numCharsProcessed += numChars;
793
794 rangeLoc += numChars;
795 rangeLen -= numChars;
796 if (max) {
797 buffer += usedLen;
798 max -= usedLen;
799 if (max <= 0) break;
800 }
801 lastUsedLen = usedLen; lastNumChars = numChars;
802 flags &= ~kCFStringEncodingPrependBOM;
803 }
804 }
805 }
806 if (usedBufLen) *usedBufLen = totalBytesWritten;
807 return numCharsProcessed;
808 }
809
810 CFStringRef CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc, const char *buffer) {
811 return CFStringCreateWithCString(alloc, buffer, CFStringFileSystemEncoding());
812 }
813
814 CFIndex CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string) {
815 CFIndex len = CFStringGetLength(string);
816 CFStringEncoding enc = CFStringGetFastestEncoding(string);
817 switch (enc) {
818 case kCFStringEncodingASCII:
819 case kCFStringEncodingMacRoman:
820 if (len > (LONG_MAX - 1L) / 3L) return kCFNotFound; // Avoid wrap-around
821 return len * 3L + 1L;
822 default:
823 if (len > (LONG_MAX - 1L) / 9L) return kCFNotFound; // Avoid wrap-around
824 return len * 9L + 1L;
825 }
826 }
827
828 Boolean CFStringGetFileSystemRepresentation(CFStringRef string, char *buffer, CFIndex maxBufLen) {
829 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
830 #define MAX_STACK_BUFFER_LEN (255)
831 const UTF16Char *characters = CFStringGetCharactersPtr(string);
832 const char *origBuffer = buffer;
833 const char *bufferLimit = buffer + maxBufLen;
834 CFIndex length = CFStringGetLength(string);
835 CFIndex usedBufLen;
836
837 if (maxBufLen < length) return false; // Since we're using UTF-8, the byte length is never shorter than the char length. Also, it filters out 0 == maxBufLen
838
839 if (NULL == characters) {
840 UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
841 CFRange range = CFRangeMake(0, 0);
842 const char *bytes = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding());
843
844 if (NULL != bytes) {
845 const char *originalBytes = bytes;
846 const char *bytesLimit = bytes + length;
847
848 while ((bytes < bytesLimit) && (buffer < bufferLimit) && (0 == (*bytes & 0x80))) *(buffer++) = *(bytes++);
849
850 range.location = bytes - originalBytes;
851 }
852 while ((range.location < length) && (buffer < bufferLimit)) {
853 range.length = length - range.location;
854 if (range.length > MAX_STACK_BUFFER_LEN) range.length = MAX_STACK_BUFFER_LEN;
855
856 CFStringGetCharacters(string, range, charactersBuffer);
857 if ((range.length == MAX_STACK_BUFFER_LEN) && CFUniCharIsSurrogateHighCharacter(charactersBuffer[MAX_STACK_BUFFER_LEN - 1])) --range.length; // Backup for a high surrogate
858
859 if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, bufferLimit - buffer, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
860
861 buffer += usedBufLen;
862 range.location += range.length;
863 }
864 } else {
865 if (!CFUniCharDecompose(characters, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
866 buffer += usedBufLen;
867 }
868
869 if (buffer < bufferLimit) { // Since the filename has its own limit, this is ok for now
870 *buffer = '\0';
871 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionLion)) {
872 while (origBuffer < buffer) if (*origBuffer++ == 0) { // There's a zero in there. Now see if the rest are all zeroes.
873 while (origBuffer < buffer) if (*origBuffer++ != 0) return false; // Embedded NULLs should cause failure: <rdar://problem/5863219>
874 }
875 }
876 return true;
877 } else {
878 return false;
879 }
880 #else
881 return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
882 #endif
883 }
884
885 Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
886 return CFStringGetFileSystemRepresentation(string, (char *)buffer, maxBufLen);
887 }
888
889
890 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
891
892 /* This function is used to obtain users' default script/region code.
893 The function first looks at environment variable __kCFUserEncodingEnvVariableName, then, reads the configuration file in user's home directory.
894 */
895 void _CFStringGetUserDefaultEncoding(UInt32 *oScriptValue, UInt32 *oRegionValue) {
896 char *stringValue;
897 char buffer[__kCFMaxDefaultEncodingFileLength];
898 int uid = getuid();
899
900 if ((stringValue = (char *)__CFgetenv(__kCFUserEncodingEnvVariableName)) != NULL) {
901 if ((uid == strtol_l(stringValue, &stringValue, 0, NULL)) && (':' == *stringValue)) {
902 ++stringValue;
903 } else {
904 stringValue = NULL;
905 }
906 }
907
908 if ((stringValue == NULL) && ((uid > 0) || __CFgetenv("HOME"))) {
909 char passwdExtraBuf[1000 + MAXPATHLEN]; // Extra memory buffer for getpwuid_r(); no clue as to how large this should be...
910 struct passwd passwdBuf, *passwdp = NULL;
911
912 switch (getpwuid_r((uid_t)uid, &passwdBuf, passwdExtraBuf, sizeof(passwdExtraBuf), &passwdp)) {
913 case 0: // Success
914 break;
915 case ERANGE: // Somehow we didn't give it enough memory; let the system handle the storage this time; but beware 5778609
916 passwdp = getpwuid((uid_t)uid);
917 break;
918 default:
919 passwdp = NULL;
920 }
921 if (passwdp) {
922 char filename[MAXPATHLEN + 1];
923
924 const char *path = NULL;
925 if (!issetugid()) {
926 path = __CFgetenv("CFFIXED_USER_HOME");
927 }
928 if (!path) {
929 path = passwdp->pw_dir;
930 }
931
932 strlcpy(filename, path, sizeof(filename));
933 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
934
935 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
936 int fd = open(filename, O_RDONLY, 0);
937 if (fd == -1) {
938 // Cannot open the file. Let's fallback to smRoman/verUS
939 snprintf(filename, sizeof(filename), "0x%X:0:0", uid);
940 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
941 } else {
942 ssize_t readSize;
943 readSize = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
944 buffer[(readSize < 0 ? 0 : readSize)] = '\0';
945 close(fd);
946 stringValue = buffer;
947
948 // Well, we already have a buffer, let's reuse it
949 snprintf(filename, sizeof(filename), "0x%X:%s", uid, buffer);
950 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
951 }
952 if (-1 != no_hang_fd) close(no_hang_fd);
953 }
954 }
955
956 if (stringValue) {
957 *oScriptValue = strtol_l(stringValue, &stringValue, 0, NULL);
958 // We force using MacRoman for Arabic/Hebrew users <rdar://problem/17633551> When changing language to Arabic and Hebrew, set the default user encoding to MacRoman, not MacArabic/MacHebrew
959 if ((*oScriptValue == kCFStringEncodingMacArabic) || (*oScriptValue == kCFStringEncodingMacHebrew)) *oScriptValue = kCFStringEncodingMacRoman;
960 if (*stringValue == ':') {
961 if (oRegionValue) *oRegionValue = strtol_l(++stringValue, NULL, 0, NULL);
962 return;
963 }
964 }
965
966 // Falling back
967 *oScriptValue = 0; // smRoman
968 if (oRegionValue) *oRegionValue = 0; // verUS
969 }
970
971 void _CFStringGetInstallationEncodingAndRegion(uint32_t *encoding, uint32_t *region) {
972 char buffer[__kCFMaxDefaultEncodingFileLength];
973 char *stringValue = NULL;
974
975 *encoding = 0;
976 *region = 0;
977
978 struct passwd *passwdp = getpwuid((uid_t)0);
979 if (passwdp) {
980 const char *path = passwdp->pw_dir;
981
982 char filename[MAXPATHLEN + 1];
983 strlcpy(filename, path, sizeof(filename));
984 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
985
986 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
987 int fd = open(filename, O_RDONLY, 0);
988 if (0 <= fd) {
989 ssize_t size = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
990 buffer[(size < 0 ? 0 : size)] = '\0';
991 close(fd);
992 stringValue = buffer;
993 }
994 if (-1 != no_hang_fd) close(no_hang_fd);
995 }
996
997 if (stringValue) {
998 *encoding = strtol_l(stringValue, &stringValue, 0, NULL);
999 // We force using MacRoman for Arabic/Hebrew users <rdar://problem/17633551> When changing language to Arabic and Hebrew, set the default user encoding to MacRoman, not MacArabic/MacHebrew
1000 if ((*encoding == kCFStringEncodingMacArabic) || (*encoding == kCFStringEncodingMacHebrew)) *encoding = kCFStringEncodingMacRoman;
1001 if (*stringValue == ':') *region = strtol_l(++stringValue, NULL, 0, NULL);
1002 }
1003 }
1004
1005 Boolean _CFStringSaveUserDefaultEncoding(UInt32 iScriptValue, UInt32 iRegionValue) {
1006 Boolean success = false;
1007 struct passwd *passwdp = getpwuid(getuid());
1008 if (passwdp) {
1009 const char *path = passwdp->pw_dir;
1010 if (!issetugid()) {
1011 const char *value = __CFgetenv("CFFIXED_USER_HOME");
1012 if (value) path = value; // override
1013 }
1014
1015 char filename[MAXPATHLEN + 1];
1016 strlcpy(filename, path, sizeof(filename));
1017 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
1018
1019 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
1020 (void)unlink(filename);
1021 int fd = open(filename, O_WRONLY|O_CREAT, 0400);
1022 if (0 <= fd) {
1023 char buffer[__kCFMaxDefaultEncodingFileLength];
1024 // We force using MacRoman for Arabic/Hebrew users <rdar://problem/17633551> When changing language to Arabic and Hebrew, set the default user encoding to MacRoman, not MacArabic/MacHebrew
1025 if ((iScriptValue == kCFStringEncodingMacArabic) || (iScriptValue == kCFStringEncodingMacHebrew)) iScriptValue = kCFStringEncodingMacRoman;
1026 size_t size = snprintf(buffer, __kCFMaxDefaultEncodingFileLength, "0x%X:0x%X", (unsigned int)iScriptValue, (unsigned int)iRegionValue);
1027 if (size <= __kCFMaxDefaultEncodingFileLength) {
1028 int ret = write(fd, buffer, size);
1029 if (size <= ret) success = true;
1030 }
1031 int save_err = errno;
1032 close(fd);
1033 errno = save_err;
1034 }
1035 int save_err = errno;
1036 if (-1 != no_hang_fd) close(no_hang_fd);
1037 errno = save_err;
1038 }
1039 return success;
1040 }
1041
1042 #endif
1043