]> git.saurik.com Git - apple/cf.git/blob - CFStringEncodings.c
CF-635.21.tar.gz
[apple/cf.git] / CFStringEncodings.c
1 /*
2 * Copyright (c) 2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFStringEncodings.c
25 Copyright (c) 1999-2011, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFString.h>
31 #include <CoreFoundation/CFByteOrder.h>
32 #include <CoreFoundation/CFPriv.h>
33 #include <string.h>
34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
35 #include <CoreFoundation/CFUniChar.h>
36 #include <CoreFoundation/CFUnicodeDecomposition.h>
37 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
38 #include <stdlib.h>
39 #include <fcntl.h>
40 #include <pwd.h>
41 #include <sys/param.h>
42 #include <unistd.h>
43 #include <string.h>
44 #include <stdio.h>
45 #include <xlocale.h>
46 #include <CoreFoundation/CFStringDefaultEncoding.h>
47 #endif
48
49 static UInt32 __CFWantsToUseASCIICompatibleConversion = (UInt32)-1;
50 CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) {
51 if (__CFWantsToUseASCIICompatibleConversion == (UInt32)-1) {
52 __CFWantsToUseASCIICompatibleConversion = false;
53 }
54 return (__CFWantsToUseASCIICompatibleConversion ? kCFStringEncodingASCIICompatibleConversion : 0);
55 }
56
57 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
58 __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
59 }
60
61 Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
62
63 // To avoid early initialization issues, we just initialize this here
64 // This should not be const as it is changed
65 __private_extern__ UniChar __CFCharToUniCharTable[256] = {
66 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
67 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
68 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
69 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
70 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
71 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
72 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
73 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
74 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
75 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
76 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
77 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
78 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
79 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
80 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
81 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
82 };
83
84 __private_extern__ void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
85 if (__CFCharToUniCharFunc != func) {
86 int ch;
87 __CFCharToUniCharFunc = func;
88 if (func) {
89 for (ch = 128; ch < 256; ch++) {
90 UniChar uch;
91 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
92 }
93 } else { // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
94 for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
95 }
96 }
97 }
98
99 __private_extern__ void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
100 CFIndex idx;
101 for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
102 }
103
104
105 /* The minimum length the output buffers should be in the above functions
106 */
107 #define kCFCharConversionBufferLength 512
108
109
110 #define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))
111 #define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))
112
113 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
114 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
115 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
116 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
117 !!! converterFlags is only used for the UTF8 converter at this point
118 */
119 Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
120 return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
121 }
122
123 enum {
124 __NSNonLossyErrorMode = -1,
125 __NSNonLossyASCIIMode = 0,
126 __NSNonLossyBackslashMode = 1,
127 __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
128 __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
129 __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
130 __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
131 };
132
133 Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
134 CFIndex idx;
135 const uint8_t *chars = (const uint8_t *)bytes;
136 const uint8_t *end = chars + len;
137 Boolean result = TRUE;
138
139 if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
140
141 buffer->isASCII = !alwaysUnicode;
142 buffer->shouldFreeChars = false;
143 buffer->numChars = 0;
144
145 if (0 == len) return true;
146
147 buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
148
149 if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16
150 const UTF16Char *src = (const UTF16Char *)bytes;
151 const UTF16Char *limit = src + (len / sizeof(UTF16Char)); // <rdar://problem/7854378> avoiding odd len issue
152 bool swap = false;
153
154 if (kCFStringEncodingUTF16 == encoding) {
155 UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);
156
157 #if __CF_BIG_ENDIAN__
158 if (bom == 0xFFFE) swap = true;
159 #else
160 if (bom != 0xFEFF) swap = true;
161 #endif
162 if (bom) useClientsMemoryPtr = NULL;
163 } else {
164 #if __CF_BIG_ENDIAN__
165 if (kCFStringEncodingUTF16LE == encoding) swap = true;
166 #else
167 if (kCFStringEncodingUTF16BE == encoding) swap = true;
168 #endif
169 }
170
171 buffer->numChars = limit - src;
172
173 if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
174 *useClientsMemoryPtr = true;
175 buffer->chars.unicode = (UniChar *)src;
176 buffer->isASCII = false;
177 } else {
178 if (buffer->isASCII) { // Let's see if we can reduce the Unicode down to ASCII...
179 const UTF16Char *characters = src;
180 UTF16Char mask = (swap ? 0x80FF : 0xFF80);
181
182 while (characters < limit) {
183 if (*(characters++) & mask) {
184 buffer->isASCII = false;
185 break;
186 }
187 }
188 }
189
190 if (buffer->isASCII) {
191 uint8_t *dst;
192 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
193 if (buffer->numChars > MAX_LOCAL_CHARS) {
194 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
195 if (!buffer->chars.ascii) goto memoryErrorExit;
196 buffer->shouldFreeChars = true;
197 } else {
198 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
199 }
200 }
201 dst = buffer->chars.ascii;
202
203 if (swap) {
204 while (src < limit) *(dst++) = (*(src++) >> 8);
205 } else {
206 while (src < limit) *(dst++) = (uint8_t)*(src++);
207 }
208 } else {
209 UTF16Char *dst;
210
211 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
212 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
213 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
214 if (!buffer->chars.unicode) goto memoryErrorExit;
215 buffer->shouldFreeChars = true;
216 } else {
217 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
218 }
219 }
220 dst = buffer->chars.unicode;
221
222 if (swap) {
223 while (src < limit) *(dst++) = CFSwapInt16(*(src++));
224 } else {
225 memmove(dst, src, buffer->numChars * sizeof(UTF16Char));
226 }
227 }
228 }
229 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
230 const UTF32Char *src = (const UTF32Char *)bytes;
231 const UTF32Char *limit = src + (len / sizeof(UTF32Char)); // <rdar://problem/7854378> avoiding odd len issue
232 bool swap = false;
233 static bool strictUTF32 = (bool)-1;
234
235 if ((bool)-1 == strictUTF32) strictUTF32 = (_CFExecutableLinkedOnOrAfter(CFSystemVersionLeopard) != 0);
236
237 if (kCFStringEncodingUTF32 == encoding) {
238 UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);
239
240 #if __CF_BIG_ENDIAN__
241 if (bom == 0xFFFE0000) swap = true;
242 #else
243 if (bom != 0x0000FEFF) swap = true;
244 #endif
245 } else {
246 #if __CF_BIG_ENDIAN__
247 if (kCFStringEncodingUTF32LE == encoding) swap = true;
248 #else
249 if (kCFStringEncodingUTF32BE == encoding) swap = true;
250 #endif
251 }
252
253 buffer->numChars = limit - src;
254
255 {
256 // Let's see if we have non-ASCII or non-BMP
257 const UTF32Char *characters = src;
258 UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);
259 UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);
260
261 while (characters < limit) {
262 if (*characters & asciiMask) {
263 buffer->isASCII = false;
264 if (*characters & bmpMask) {
265 if (strictUTF32 && ((swap ? (UTF32Char)CFSwapInt32(*characters) : *characters) > 0x10FFFF)) return false; // outside of Unicode Scaler Value. Haven't allocated buffer, yet.
266 ++(buffer->numChars);
267 }
268 }
269 ++characters;
270 }
271 }
272
273 if (buffer->isASCII) {
274 uint8_t *dst;
275 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
276 if (buffer->numChars > MAX_LOCAL_CHARS) {
277 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
278 if (!buffer->chars.ascii) goto memoryErrorExit;
279 buffer->shouldFreeChars = true;
280 } else {
281 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
282 }
283 }
284 dst = buffer->chars.ascii;
285
286 if (swap) {
287 while (src < limit) *(dst++) = (*(src++) >> 24);
288 } else {
289 while (src < limit) *(dst++) = *(src++);
290 }
291 } else {
292 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
293 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
294 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
295 if (!buffer->chars.unicode) goto memoryErrorExit;
296 buffer->shouldFreeChars = true;
297 } else {
298 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
299 }
300 }
301 result = (CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, (strictUTF32 ? false : true), __CF_BIG_ENDIAN__ ? !swap : swap) ? TRUE : FALSE);
302 }
303 } else if (kCFStringEncodingUTF8 == encoding) {
304 if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) { // If UTF8 BOM, skip
305 chars += 3;
306 len -= 3;
307 if (0 == len) return true;
308 }
309 if (buffer->isASCII) {
310 for (idx = 0; idx < len; idx++) {
311 if (128 <= chars[idx]) {
312 buffer->isASCII = false;
313 break;
314 }
315 }
316 }
317 if (buffer->isASCII) {
318 buffer->numChars = len;
319 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
320 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
321 if (!buffer->chars.ascii) goto memoryErrorExit;
322 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
323 } else {
324 CFIndex numDone;
325 static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
326
327 if (!__CFFromUTF8) {
328 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
329 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
330 }
331
332 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
333 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
334 if (!buffer->chars.unicode) goto memoryErrorExit;
335 buffer->numChars = 0;
336 while (chars < end) {
337 numDone = 0;
338 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
339
340 if (0 == numDone) {
341 result = FALSE;
342 break;
343 }
344 buffer->numChars += numDone;
345 }
346 }
347 } else if (kCFStringEncodingNonLossyASCII == encoding) {
348 UTF16Char currentValue = 0;
349 uint8_t character;
350 int8_t mode = __NSNonLossyASCIIMode;
351
352 buffer->isASCII = false;
353 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
354 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
355 if (!buffer->chars.unicode) goto memoryErrorExit;
356 buffer->numChars = 0;
357
358 while (chars < end) {
359 character = (*chars++);
360
361 switch (mode) {
362 case __NSNonLossyASCIIMode:
363 if (character == '\\') {
364 mode = __NSNonLossyBackslashMode;
365 } else if (character < 0x80) {
366 currentValue = character;
367 } else {
368 mode = __NSNonLossyErrorMode;
369 }
370 break;
371
372 case __NSNonLossyBackslashMode:
373 if ((character == 'U') || (character == 'u')) {
374 mode = __NSNonLossyHexInitialMode;
375 currentValue = 0;
376 } else if ((character >= '0') && (character <= '9')) {
377 mode = __NSNonLossyOctalInitialMode;
378 currentValue = character - '0';
379 } else if (character == '\\') {
380 mode = __NSNonLossyASCIIMode;
381 currentValue = character;
382 } else {
383 mode = __NSNonLossyErrorMode;
384 }
385 break;
386
387 default:
388 if (mode < __NSNonLossyHexFinalMode) {
389 if ((character >= '0') && (character <= '9')) {
390 currentValue = (currentValue << 4) | (character - '0');
391 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
392 } else {
393 if (character >= 'a') character -= ('a' - 'A');
394 if ((character >= 'A') && (character <= 'F')) {
395 currentValue = (currentValue << 4) | ((character - 'A') + 10);
396 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
397 } else {
398 mode = __NSNonLossyErrorMode;
399 }
400 }
401 } else {
402 if ((character >= '0') && (character <= '9')) {
403 currentValue = (currentValue << 3) | (character - '0');
404 if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
405 } else {
406 mode = __NSNonLossyErrorMode;
407 }
408 }
409 break;
410 }
411
412 if (mode == __NSNonLossyASCIIMode) {
413 buffer->chars.unicode[buffer->numChars++] = currentValue;
414 } else if (mode == __NSNonLossyErrorMode) {
415 break;
416 }
417 }
418 result = ((mode == __NSNonLossyASCIIMode) ? YES : NO);
419 } else {
420 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
421
422 if (!converter) return false;
423
424 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
425
426 if (!isASCIISuperset) buffer->isASCII = false;
427
428 if (buffer->isASCII) {
429 for (idx = 0; idx < len; idx++) {
430 if (128 <= chars[idx]) {
431 buffer->isASCII = false;
432 break;
433 }
434 }
435 }
436
437 if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
438 if (buffer->isASCII) {
439 buffer->numChars = len;
440 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
441 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
442 if (!buffer->chars.ascii) goto memoryErrorExit;
443 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
444 } else {
445 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
446 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
447 if (!buffer->chars.unicode) goto memoryErrorExit;
448 buffer->numChars = len;
449 if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
450 for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
451 } else {
452 for (idx = 0; idx < len; idx++) {
453 if (chars[idx] < 0x80 && isASCIISuperset) {
454 buffer->chars.unicode[idx] = (UniChar)chars[idx];
455 } else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx)) {
456 result = FALSE;
457 break;
458 }
459 }
460 }
461 }
462 } else {
463 if (buffer->isASCII) {
464 buffer->numChars = len;
465 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
466 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
467 if (!buffer->chars.ascii) goto memoryErrorExit;
468 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
469 } else {
470 CFIndex guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
471 static UInt32 lossyFlag = (UInt32)-1;
472
473 buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
474 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
475 if (!buffer->chars.unicode) goto memoryErrorExit;
476
477 if (lossyFlag == (UInt32)-1) lossyFlag = 0;
478
479 if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) result = FALSE;
480 }
481 }
482 }
483
484 if (FALSE == result) {
485 memoryErrorExit: // Added for <rdar://problem/6581621>, but it's not clear whether an exception would be a better option
486 result = FALSE; // In case we come here from a goto
487 if (buffer->shouldFreeChars && buffer->chars.unicode) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
488 buffer->isASCII = !alwaysUnicode;
489 buffer->shouldFreeChars = false;
490 buffer->chars.ascii = NULL;
491 buffer->numChars = 0;
492 }
493 return result;
494 }
495
496
497 /* Create a byte stream from a CFString backing. Can convert a string piece at a time
498 into a fixed size buffer. Returns number of characters converted.
499 Characters that cannot be converted to the specified encoding are represented
500 with the char specified by lossByte; if 0, then lossy conversion is not allowed
501 and conversion stops, returning partial results.
502 Pass buffer==NULL if you don't care about the converted string (but just the convertability,
503 or number of bytes required, indicated by usedBufLen).
504 Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
505
506 Note: This function is intended to work through CFString functions, so it should work
507 with NSStrings as well as CFStrings.
508 */
509 CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
510 CFIndex totalBytesWritten = 0; /* Number of written bytes */
511 CFIndex numCharsProcessed = 0; /* Number of processed chars */
512 const UniChar *unichars;
513
514 if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
515 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
516
517 if (!__CFToUTF8) {
518 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
519 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
520 }
521 numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
522
523 } else if (encoding == kCFStringEncodingNonLossyASCII) {
524 const char *hex = "0123456789abcdef";
525 UniChar ch;
526 CFStringInlineBuffer buf;
527 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
528 while (numCharsProcessed < rangeLen) {
529 CFIndex reqLength; /* Required number of chars to encode this UniChar */
530 CFIndex cnt;
531 char tmp[6];
532 ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
533 if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
534 reqLength = 1;
535 tmp[0] = (char)ch;
536 } else {
537 if (ch == '\\') {
538 tmp[1] = '\\';
539 reqLength = 2;
540 } else if (ch < 256) { /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
541 tmp[1] = '0' + (ch >> 6);
542 tmp[2] = '0' + ((ch >> 3) & 7);
543 tmp[3] = '0' + (ch & 7);
544 reqLength = 4;
545 } else { /* \Unnnn */
546 tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
547 tmp[2] = hex[(ch >> 12) & 0x0f];
548 tmp[3] = hex[(ch >> 8) & 0x0f];
549 tmp[4] = hex[(ch >> 4) & 0x0f];
550 tmp[5] = hex[ch & 0x0f];
551 reqLength = 6;
552 }
553 tmp[0] = '\\';
554 }
555 if (buffer) {
556 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
557 .*/
558 for (cnt = 0; cnt < reqLength; cnt++) {
559 buffer[totalBytesWritten + cnt] = tmp[cnt];
560 }
561 }
562 totalBytesWritten += reqLength;
563 numCharsProcessed++;
564 }
565 } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
566 CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
567 numCharsProcessed = rangeLen;
568 if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
569 numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
570 }
571 totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
572 if (buffer) {
573 if (extraForBOM) { /* Generate BOM */
574 #if __CF_BIG_ENDIAN__
575 *buffer++ = 0xfe; *buffer++ = 0xff;
576 #else
577 *buffer++ = 0xff; *buffer++ = 0xfe;
578 #endif
579 }
580 CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
581 if ((__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap
582 UTF16Char *characters = (UTF16Char *)buffer;
583 const UTF16Char *limit = characters + numCharsProcessed;
584
585 while (characters < limit) {
586 *characters = CFSwapInt16(*characters);
587 ++characters;
588 }
589 }
590 }
591 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
592 UTF32Char character;
593 CFStringInlineBuffer buf;
594 UTF32Char *characters = (UTF32Char *)buffer;
595
596 bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);
597 if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
598 totalBytesWritten += sizeof(UTF32Char);
599 if (characters) {
600 if (totalBytesWritten > max) { // insufficient buffer
601 totalBytesWritten = 0;
602 } else {
603 *(characters++) = 0x0000FEFF;
604 }
605 }
606 }
607
608 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
609 while (numCharsProcessed < rangeLen) {
610 character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
611
612 if (CFUniCharIsSurrogateHighCharacter(character)) {
613 UTF16Char otherCharacter;
614
615 if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
616 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
617 } else if (lossByte) {
618 character = lossByte;
619 } else {
620 break;
621 }
622 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
623 if (lossByte) {
624 character = lossByte;
625 } else {
626 break;
627 }
628 }
629
630 totalBytesWritten += sizeof(UTF32Char);
631
632 if (characters) {
633 if (totalBytesWritten > max) {
634 totalBytesWritten -= sizeof(UTF32Char);
635 break;
636 }
637 *(characters++) = (swap ? CFSwapInt32(character) : character);
638 }
639
640 numCharsProcessed += (character > 0xFFFF ? 2 : 1);
641 }
642 } else {
643 CFIndex numChars;
644 UInt32 flags;
645 const unsigned char *cString = NULL;
646 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
647
648 if (!CFStringEncodingIsValidEncoding(encoding)) return 0;
649
650 if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
651 const unsigned char *ptr;
652 if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
653 ptr = (cString += rangeLoc);
654 if (__CFStringGetEightBitStringEncoding() == encoding) {
655 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
656 if (buffer) memmove(buffer, cString, numCharsProcessed);
657 if (usedBufLen) *usedBufLen = numCharsProcessed;
658 return numCharsProcessed;
659 }
660
661 CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;
662 while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {
663 ++ptr;
664 --rangeLen;
665 }
666 numCharsProcessed = ptr - cString;
667 if (buffer) {
668 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
669 memmove(buffer, cString, numCharsProcessed);
670 buffer += numCharsProcessed;
671 max -= numCharsProcessed;
672 }
673 if (!rangeLen || (buffer && (max == 0))) {
674 if (usedBufLen) *usedBufLen = numCharsProcessed;
675 return numCharsProcessed;
676 }
677 rangeLoc += numCharsProcessed;
678 totalBytesWritten += numCharsProcessed;
679 }
680 if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
681 ptr = (cString += (rangeLoc + 1));
682 if (__CFStringGetEightBitStringEncoding() == encoding) {
683 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
684 if (buffer) memmove(buffer, cString, numCharsProcessed);
685 if (usedBufLen) *usedBufLen = numCharsProcessed;
686 return numCharsProcessed;
687 }
688 while (*ptr < 0x80 && rangeLen > 0) {
689 ++ptr;
690 --rangeLen;
691 }
692 numCharsProcessed = ptr - cString;
693 if (buffer) {
694 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
695 memmove(buffer, cString, numCharsProcessed);
696 buffer += numCharsProcessed;
697 max -= numCharsProcessed;
698 }
699 if (!rangeLen || (buffer && (max == 0))) {
700 if (usedBufLen) *usedBufLen = numCharsProcessed;
701 return numCharsProcessed;
702 }
703 rangeLoc += numCharsProcessed;
704 totalBytesWritten += numCharsProcessed;
705 }
706 }
707
708 if (!buffer) max = 0;
709
710 // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
711 // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
712 flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());
713
714 if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string
715 CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
716 } else {
717 UniChar charBuf[kCFCharConversionBufferLength];
718 CFIndex currentLength;
719 CFIndex usedLen;
720 CFIndex lastUsedLen = 0, lastNumChars = 0;
721 uint32_t result;
722 uint32_t streamingMask;
723 uint32_t streamID = 0;
724 #define MAX_DECOMP_LEN (6)
725
726 while (rangeLen > 0) {
727 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
728 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
729
730 // could be in the middle of surrogate pair; back up.
731 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
732
733 streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);
734
735 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);
736 streamID = CFStringEncodingStreamIDFromMask(result);
737 result &= ~CFStringEncodingStreamIDMask;
738
739 if (result != kCFStringEncodingConversionSuccess) {
740 if (kCFStringEncodingInvalidInputStream == result) {
741 CFRange composedRange;
742 // Check the tail
743 if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
744 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
745
746 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
747 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
748 streamID = CFStringEncodingStreamIDFromMask(result);
749 result &= ~CFStringEncodingStreamIDMask;
750 }
751 }
752
753 // Check the head
754 if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
755 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
756
757 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
758 // Try if the composed range can be converted
759 CFStringGetCharacters(string, composedRange, charBuf);
760
761 if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
762 CFIndex lastRangeLoc = rangeLoc - lastNumChars;
763
764 currentLength = composedRange.location - lastRangeLoc;
765 CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
766
767 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);
768 streamID = CFStringEncodingStreamIDFromMask(result);
769 result &= ~CFStringEncodingStreamIDMask;
770
771 if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run
772 // Looks good. back up
773 totalBytesWritten -= lastUsedLen;
774 numCharsProcessed -= lastNumChars;
775
776 rangeLoc = lastRangeLoc;
777 rangeLen += lastNumChars;
778
779 if (max) {
780 buffer -= lastUsedLen;
781 max += lastUsedLen;
782 }
783 }
784 }
785 }
786 }
787 }
788
789 if (kCFStringEncodingConversionSuccess != result) { // really failed
790 totalBytesWritten += usedLen;
791 numCharsProcessed += numChars;
792 break;
793 }
794 }
795
796 totalBytesWritten += usedLen;
797 numCharsProcessed += numChars;
798
799 rangeLoc += numChars;
800 rangeLen -= numChars;
801 if (max) {
802 buffer += usedLen;
803 max -= usedLen;
804 if (max <= 0) break;
805 }
806 lastUsedLen = usedLen; lastNumChars = numChars;
807 flags &= ~kCFStringEncodingPrependBOM;
808 }
809 }
810 }
811 if (usedBufLen) *usedBufLen = totalBytesWritten;
812 return numCharsProcessed;
813 }
814
815 CFStringRef CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc, const char *buffer) {
816 return CFStringCreateWithCString(alloc, buffer, CFStringFileSystemEncoding());
817 }
818
819 CFIndex CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string) {
820 CFIndex len = CFStringGetLength(string);
821 CFStringEncoding enc = CFStringGetFastestEncoding(string);
822 switch (enc) {
823 case kCFStringEncodingASCII:
824 case kCFStringEncodingMacRoman:
825 if (len > (LONG_MAX - 1L) / 3L) return kCFNotFound; // Avoid wrap-around
826 return len * 3L + 1L;
827 default:
828 if (len > (LONG_MAX - 1L) / 9L) return kCFNotFound; // Avoid wrap-around
829 return len * 9L + 1L;
830 }
831 }
832
833 Boolean CFStringGetFileSystemRepresentation(CFStringRef string, char *buffer, CFIndex maxBufLen) {
834 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
835 #define MAX_STACK_BUFFER_LEN (255)
836 const UTF16Char *characters = CFStringGetCharactersPtr(string);
837 const char *origBuffer = buffer;
838 const char *bufferLimit = buffer + maxBufLen;
839 CFIndex length = CFStringGetLength(string);
840 CFIndex usedBufLen;
841
842 if (maxBufLen < length) return false; // Since we're using UTF-8, the byte length is never shorter than the char length. Also, it filters out 0 == maxBufLen
843
844 if (NULL == characters) {
845 UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
846 CFRange range = CFRangeMake(0, 0);
847 const char *bytes = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding());
848
849 if (NULL != bytes) {
850 const char *originalBytes = bytes;
851 const char *bytesLimit = bytes + length;
852
853 while ((bytes < bytesLimit) && (buffer < bufferLimit) && (0 == (*bytes & 0x80))) *(buffer++) = *(bytes++);
854
855 range.location = bytes - originalBytes;
856 }
857 while ((range.location < length) && (buffer < bufferLimit)) {
858 range.length = length - range.location;
859 if (range.length > MAX_STACK_BUFFER_LEN) range.length = MAX_STACK_BUFFER_LEN;
860
861 CFStringGetCharacters(string, range, charactersBuffer);
862 if ((range.length == MAX_STACK_BUFFER_LEN) && CFUniCharIsSurrogateHighCharacter(charactersBuffer[MAX_STACK_BUFFER_LEN - 1])) --range.length; // Backup for a high surrogate
863
864 if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, bufferLimit - buffer, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
865
866 buffer += usedBufLen;
867 range.location += range.length;
868 }
869 } else {
870 if (!CFUniCharDecompose(characters, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
871 buffer += usedBufLen;
872 }
873
874 if (buffer < bufferLimit) { // Since the filename has its own limit, this is ok for now
875 *buffer = '\0';
876 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionLion)) {
877 while (origBuffer < buffer) if (*origBuffer++ == 0) { // There's a zero in there. Now see if the rest are all zeroes.
878 while (origBuffer < buffer) if (*origBuffer++ != 0) return false; // Embedded NULLs should cause failure: <rdar://problem/5863219>
879 }
880 }
881 return true;
882 } else {
883 return false;
884 }
885 #else
886 return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
887 #endif
888 }
889
890 Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
891 return CFStringGetFileSystemRepresentation(string, (char *)buffer, maxBufLen);
892 }
893
894
895 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
896
897 /* This function is used to obtain users' default script/region code.
898 The function first looks at environment variable __kCFUserEncodingEnvVariableName, then, reads the configuration file in user's home directory.
899 */
900 void _CFStringGetUserDefaultEncoding(UInt32 *oScriptValue, UInt32 *oRegionValue) {
901 char *stringValue;
902 char buffer[__kCFMaxDefaultEncodingFileLength];
903 int uid = getuid();
904
905 if ((stringValue = (char *)__CFgetenv(__kCFUserEncodingEnvVariableName)) != NULL) {
906 if ((uid == strtol_l(stringValue, &stringValue, 0, NULL)) && (':' == *stringValue)) {
907 ++stringValue;
908 } else {
909 stringValue = NULL;
910 }
911 }
912
913 if ((stringValue == NULL) && ((uid > 0) || __CFgetenv("HOME"))) {
914 char passwdExtraBuf[1000 + MAXPATHLEN]; // Extra memory buffer for getpwuid_r(); no clue as to how large this should be...
915 struct passwd passwdBuf, *passwdp = NULL;
916
917 switch (getpwuid_r((uid_t)uid, &passwdBuf, passwdExtraBuf, sizeof(passwdExtraBuf), &passwdp)) {
918 case 0: // Success
919 break;
920 case ERANGE: // Somehow we didn't give it enough memory; let the system handle the storage this time; but beware 5778609
921 passwdp = getpwuid((uid_t)uid);
922 break;
923 default:
924 passwdp = NULL;
925 }
926 if (passwdp) {
927 char filename[MAXPATHLEN + 1];
928
929 const char *path = NULL;
930 if (!issetugid()) {
931 path = __CFgetenv("CFFIXED_USER_HOME");
932 }
933 if (!path) {
934 path = passwdp->pw_dir;
935 }
936
937 strlcpy(filename, path, sizeof(filename));
938 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
939
940 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
941 int fd = open(filename, O_RDONLY, 0);
942 if (fd == -1) {
943 // Cannot open the file. Let's fallback to smRoman/verUS
944 snprintf(filename, sizeof(filename), "0x%X:0:0", uid);
945 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
946 } else {
947 ssize_t readSize;
948 readSize = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
949 buffer[(readSize < 0 ? 0 : readSize)] = '\0';
950 close(fd);
951 stringValue = buffer;
952
953 // Well, we already have a buffer, let's reuse it
954 snprintf(filename, sizeof(filename), "0x%X:%s", uid, buffer);
955 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
956 }
957 if (-1 != no_hang_fd) close(no_hang_fd);
958 }
959 }
960
961 if (stringValue) {
962 *oScriptValue = strtol_l(stringValue, &stringValue, 0, NULL);
963 if (*stringValue == ':') {
964 if (oRegionValue) *oRegionValue = strtol_l(++stringValue, NULL, 0, NULL);
965 return;
966 }
967 }
968
969 // Falling back
970 *oScriptValue = 0; // smRoman
971 if (oRegionValue) *oRegionValue = 0; // verUS
972 }
973
974 void _CFStringGetInstallationEncodingAndRegion(uint32_t *encoding, uint32_t *region) {
975 char buffer[__kCFMaxDefaultEncodingFileLength];
976 char *stringValue = NULL;
977
978 *encoding = 0;
979 *region = 0;
980
981 struct passwd *passwdp = getpwuid((uid_t)0);
982 if (passwdp) {
983 const char *path = passwdp->pw_dir;
984
985 char filename[MAXPATHLEN + 1];
986 strlcpy(filename, path, sizeof(filename));
987 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
988
989 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
990 int fd = open(filename, O_RDONLY, 0);
991 if (0 <= fd) {
992 ssize_t size = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
993 buffer[(size < 0 ? 0 : size)] = '\0';
994 close(fd);
995 stringValue = buffer;
996 }
997 if (-1 != no_hang_fd) close(no_hang_fd);
998 }
999
1000 if (stringValue) {
1001 *encoding = strtol_l(stringValue, &stringValue, 0, NULL);
1002 if (*stringValue == ':') *region = strtol_l(++stringValue, NULL, 0, NULL);
1003 }
1004 }
1005
1006 Boolean _CFStringSaveUserDefaultEncoding(UInt32 iScriptValue, UInt32 iRegionValue) {
1007 Boolean success = false;
1008 struct passwd *passwdp = getpwuid(getuid());
1009 if (passwdp) {
1010 const char *path = passwdp->pw_dir;
1011 if (!issetugid()) {
1012 const char *value = __CFgetenv("CFFIXED_USER_HOME");
1013 if (value) path = value; // override
1014 }
1015
1016 char filename[MAXPATHLEN + 1];
1017 strlcpy(filename, path, sizeof(filename));
1018 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
1019
1020 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
1021 (void)unlink(filename);
1022 int fd = open(filename, O_WRONLY|O_CREAT, 0400);
1023 if (0 <= fd) {
1024 char buffer[__kCFMaxDefaultEncodingFileLength];
1025 size_t size = snprintf(buffer, __kCFMaxDefaultEncodingFileLength, "0x%X:0x%X", (unsigned int)iScriptValue, (unsigned int)iRegionValue);
1026 if (size <= __kCFMaxDefaultEncodingFileLength) {
1027 int ret = write(fd, buffer, size);
1028 if (size <= ret) success = true;
1029 }
1030 int save_err = errno;
1031 close(fd);
1032 errno = save_err;
1033 }
1034 int save_err = errno;
1035 if (-1 != no_hang_fd) close(no_hang_fd);
1036 errno = save_err;
1037 }
1038 return success;
1039 }
1040
1041 #endif
1042