]> git.saurik.com Git - apple/cf.git/blob - CFStringEncodings.c
CF-550.13.tar.gz
[apple/cf.git] / CFStringEncodings.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFStringEncodings.c
25 Copyright (c) 1999-2009, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFString.h>
31 #include <CoreFoundation/CFByteOrder.h>
32 #include <CoreFoundation/CFPriv.h>
33 #include <string.h>
34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
35 #include <CoreFoundation/CFUniChar.h>
36 #include <CoreFoundation/CFUnicodeDecomposition.h>
37 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
38 #include <stdlib.h>
39 #include <fcntl.h>
40 #include <pwd.h>
41 #include <sys/param.h>
42 #include <unistd.h>
43 #include <string.h>
44 #include <stdio.h>
45 #include <xlocale.h>
46 #include <CoreFoundation/CFStringDefaultEncoding.h>
47 #endif
48
49 static UInt32 __CFWantsToUseASCIICompatibleConversion = (UInt32)-1;
50 CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) {
51 if (__CFWantsToUseASCIICompatibleConversion == (UInt32)-1) {
52 __CFWantsToUseASCIICompatibleConversion = false;
53 }
54 return (__CFWantsToUseASCIICompatibleConversion ? kCFStringEncodingASCIICompatibleConversion : 0);
55 }
56
57 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
58 __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
59 }
60
61 Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
62
63 // To avoid early initialization issues, we just initialize this here
64 // This should not be const as it is changed
65 __private_extern__ UniChar __CFCharToUniCharTable[256] = {
66 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
67 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
68 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
69 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
70 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
71 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
72 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
73 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
74 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
75 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
76 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
77 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
78 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
79 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
80 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
81 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
82 };
83
84 __private_extern__ void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
85 if (__CFCharToUniCharFunc != func) {
86 int ch;
87 __CFCharToUniCharFunc = func;
88 if (func) {
89 for (ch = 128; ch < 256; ch++) {
90 UniChar uch;
91 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
92 }
93 } else { // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
94 for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
95 }
96 }
97 }
98
99 __private_extern__ void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
100 CFIndex idx;
101 for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
102 }
103
104
105 /* The minimum length the output buffers should be in the above functions
106 */
107 #define kCFCharConversionBufferLength 512
108
109
110 #define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))
111 #define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))
112
113 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
114 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
115 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
116 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
117 !!! converterFlags is only used for the UTF8 converter at this point
118 */
119 Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
120 return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
121 }
122
123 enum {
124 __NSNonLossyErrorMode = -1,
125 __NSNonLossyASCIIMode = 0,
126 __NSNonLossyBackslashMode = 1,
127 __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
128 __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
129 __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
130 __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
131 };
132
133 Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
134 CFIndex idx;
135 const uint8_t *chars = (const uint8_t *)bytes;
136 const uint8_t *end = chars + len;
137 Boolean result = TRUE;
138
139 if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
140
141 buffer->isASCII = !alwaysUnicode;
142 buffer->shouldFreeChars = false;
143 buffer->numChars = 0;
144
145 if (0 == len) return true;
146
147 buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
148
149 if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16
150 const UTF16Char *src = (const UTF16Char *)bytes;
151 const UTF16Char *limit = (const UTF16Char *)(bytes + len);
152 bool swap = false;
153
154 if (kCFStringEncodingUTF16 == encoding) {
155 UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);
156
157 #if __CF_BIG_ENDIAN__
158 if (bom == 0xFFFE) swap = true;
159 #else
160 if (bom != 0xFEFF) swap = true;
161 #endif
162 if (bom) useClientsMemoryPtr = NULL;
163 } else {
164 #if __CF_BIG_ENDIAN__
165 if (kCFStringEncodingUTF16LE == encoding) swap = true;
166 #else
167 if (kCFStringEncodingUTF16BE == encoding) swap = true;
168 #endif
169 }
170
171 buffer->numChars = limit - src;
172
173 if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
174 *useClientsMemoryPtr = true;
175 buffer->chars.unicode = (UniChar *)src;
176 buffer->isASCII = false;
177 } else {
178 if (buffer->isASCII) { // Let's see if we can reduce the Unicode down to ASCII...
179 const UTF16Char *characters = src;
180 UTF16Char mask = (swap ? 0x80FF : 0xFF80);
181
182 while (characters < limit) {
183 if (*(characters++) & mask) {
184 buffer->isASCII = false;
185 break;
186 }
187 }
188 }
189
190 if (buffer->isASCII) {
191 uint8_t *dst;
192 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
193 if (buffer->numChars > MAX_LOCAL_CHARS) {
194 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
195 if (!buffer->chars.ascii) goto memoryErrorExit;
196 buffer->shouldFreeChars = true;
197 } else {
198 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
199 }
200 }
201 dst = buffer->chars.ascii;
202
203 if (swap) {
204 while (src < limit) *(dst++) = (*(src++) >> 8);
205 } else {
206 while (src < limit) *(dst++) = (uint8_t)*(src++);
207 }
208 } else {
209 UTF16Char *dst;
210
211 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
212 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
213 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
214 if (!buffer->chars.unicode) goto memoryErrorExit;
215 buffer->shouldFreeChars = true;
216 } else {
217 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
218 }
219 }
220 dst = buffer->chars.unicode;
221
222 if (swap) {
223 while (src < limit) *(dst++) = CFSwapInt16(*(src++));
224 } else {
225 memmove(dst, src, buffer->numChars * sizeof(UTF16Char));
226 }
227 }
228 }
229 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
230 const UTF32Char *src = (const UTF32Char *)bytes;
231 const UTF32Char *limit = (const UTF32Char *)(bytes + len);
232 bool swap = false;
233 static bool strictUTF32 = (bool)-1;
234
235 if ((bool)-1 == strictUTF32) strictUTF32 = (_CFExecutableLinkedOnOrAfter(CFSystemVersionLeopard) != 0);
236
237 if (kCFStringEncodingUTF32 == encoding) {
238 UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);
239
240 #if __CF_BIG_ENDIAN__
241 if (bom == 0xFFFE0000) swap = true;
242 #else
243 if (bom != 0x0000FEFF) swap = true;
244 #endif
245 } else {
246 #if __CF_BIG_ENDIAN__
247 if (kCFStringEncodingUTF32LE == encoding) swap = true;
248 #else
249 if (kCFStringEncodingUTF32BE == encoding) swap = true;
250 #endif
251 }
252
253 buffer->numChars = limit - src;
254
255 {
256 // Let's see if we have non-ASCII or non-BMP
257 const UTF32Char *characters = src;
258 UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);
259 UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);
260
261 while (characters < limit) {
262 if (*characters & asciiMask) {
263 buffer->isASCII = false;
264 if (*characters & bmpMask) {
265 if (strictUTF32 && ((swap ? (UTF32Char)CFSwapInt32(*characters) : *characters) > 0x10FFFF)) return false; // outside of Unicode Scaler Value. Haven't allocated buffer, yet.
266 ++(buffer->numChars);
267 }
268 }
269 ++characters;
270 }
271 }
272
273 if (buffer->isASCII) {
274 uint8_t *dst;
275 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
276 if (buffer->numChars > MAX_LOCAL_CHARS) {
277 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
278 if (!buffer->chars.ascii) goto memoryErrorExit;
279 buffer->shouldFreeChars = true;
280 } else {
281 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
282 }
283 }
284 dst = buffer->chars.ascii;
285
286 if (swap) {
287 while (src < limit) *(dst++) = (*(src++) >> 24);
288 } else {
289 while (src < limit) *(dst++) = *(src++);
290 }
291 } else {
292 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
293 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
294 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
295 if (!buffer->chars.unicode) goto memoryErrorExit;
296 buffer->shouldFreeChars = true;
297 } else {
298 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
299 }
300 }
301 result = (CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, (strictUTF32 ? false : true), __CF_BIG_ENDIAN__ ? !swap : swap) ? TRUE : FALSE);
302 }
303 } else if (kCFStringEncodingUTF8 == encoding) {
304 if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) { // If UTF8 BOM, skip
305 chars += 3;
306 len -= 3;
307 if (0 == len) return true;
308 }
309 if (buffer->isASCII) {
310 for (idx = 0; idx < len; idx++) {
311 if (128 <= chars[idx]) {
312 buffer->isASCII = false;
313 break;
314 }
315 }
316 }
317 if (buffer->isASCII) {
318 buffer->numChars = len;
319 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
320 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
321 if (!buffer->chars.ascii) goto memoryErrorExit;
322 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
323 } else {
324 CFIndex numDone;
325 static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
326
327 if (!__CFFromUTF8) {
328 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
329 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
330 }
331
332 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
333 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
334 if (!buffer->chars.unicode) goto memoryErrorExit;
335 buffer->numChars = 0;
336 while (chars < end) {
337 numDone = 0;
338 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
339
340 if (0 == numDone) {
341 result = FALSE;
342 break;
343 }
344 buffer->numChars += numDone;
345 }
346 }
347 } else if (kCFStringEncodingNonLossyASCII == encoding) {
348 UTF16Char currentValue = 0;
349 uint8_t character;
350 int8_t mode = __NSNonLossyASCIIMode;
351
352 buffer->isASCII = false;
353 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
354 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
355 if (!buffer->chars.unicode) goto memoryErrorExit;
356 buffer->numChars = 0;
357
358 while (chars < end) {
359 character = (*chars++);
360
361 switch (mode) {
362 case __NSNonLossyASCIIMode:
363 if (character == '\\') {
364 mode = __NSNonLossyBackslashMode;
365 } else if (character < 0x80) {
366 currentValue = character;
367 } else {
368 mode = __NSNonLossyErrorMode;
369 }
370 break;
371
372 case __NSNonLossyBackslashMode:
373 if ((character == 'U') || (character == 'u')) {
374 mode = __NSNonLossyHexInitialMode;
375 currentValue = 0;
376 } else if ((character >= '0') && (character <= '9')) {
377 mode = __NSNonLossyOctalInitialMode;
378 currentValue = character - '0';
379 } else if (character == '\\') {
380 mode = __NSNonLossyASCIIMode;
381 currentValue = character;
382 } else {
383 mode = __NSNonLossyErrorMode;
384 }
385 break;
386
387 default:
388 if (mode < __NSNonLossyHexFinalMode) {
389 if ((character >= '0') && (character <= '9')) {
390 currentValue = (currentValue << 4) | (character - '0');
391 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
392 } else {
393 if (character >= 'a') character -= ('a' - 'A');
394 if ((character >= 'A') && (character <= 'F')) {
395 currentValue = (currentValue << 4) | ((character - 'A') + 10);
396 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
397 } else {
398 mode = __NSNonLossyErrorMode;
399 }
400 }
401 } else {
402 if ((character >= '0') && (character <= '9')) {
403 currentValue = (currentValue << 3) | (character - '0');
404 if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
405 } else {
406 mode = __NSNonLossyErrorMode;
407 }
408 }
409 break;
410 }
411
412 if (mode == __NSNonLossyASCIIMode) {
413 buffer->chars.unicode[buffer->numChars++] = currentValue;
414 } else if (mode == __NSNonLossyErrorMode) {
415 break;
416 }
417 }
418 result = ((mode == __NSNonLossyASCIIMode) ? YES : NO);
419 } else {
420 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
421
422 if (!converter) return false;
423
424 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
425
426 if (!isASCIISuperset) buffer->isASCII = false;
427
428 if (buffer->isASCII) {
429 for (idx = 0; idx < len; idx++) {
430 if (128 <= chars[idx]) {
431 buffer->isASCII = false;
432 break;
433 }
434 }
435 }
436
437 if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
438 if (buffer->isASCII) {
439 buffer->numChars = len;
440 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
441 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
442 if (!buffer->chars.ascii) goto memoryErrorExit;
443 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
444 } else {
445 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
446 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
447 if (!buffer->chars.unicode) goto memoryErrorExit;
448 buffer->numChars = len;
449 if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
450 for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
451 } else {
452 for (idx = 0; idx < len; idx++) {
453 if (chars[idx] < 0x80 && isASCIISuperset) {
454 buffer->chars.unicode[idx] = (UniChar)chars[idx];
455 } else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx)) {
456 result = FALSE;
457 break;
458 }
459 }
460 }
461 }
462 } else {
463 if (buffer->isASCII) {
464 buffer->numChars = len;
465 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
466 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
467 if (!buffer->chars.ascii) goto memoryErrorExit;
468 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
469 } else {
470 CFIndex guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
471 static UInt32 lossyFlag = (UInt32)-1;
472
473 buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
474 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
475 if (!buffer->chars.unicode) goto memoryErrorExit;
476
477 if (lossyFlag == (UInt32)-1) lossyFlag = (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? 0 : kCFStringEncodingAllowLossyConversion);
478
479 if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) result = FALSE;
480 }
481 }
482 }
483
484 if (FALSE == result) {
485 memoryErrorExit: // Added for <rdar://problem/6581621>, but it's not clear whether an exception would be a better option
486 result = FALSE; // In case we come here from a goto
487 if (buffer->shouldFreeChars && buffer->chars.unicode) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
488 buffer->isASCII = !alwaysUnicode;
489 buffer->shouldFreeChars = false;
490 buffer->chars.ascii = NULL;
491 buffer->numChars = 0;
492 }
493 return result;
494 }
495
496
497 /* Create a byte stream from a CFString backing. Can convert a string piece at a time
498 into a fixed size buffer. Returns number of characters converted.
499 Characters that cannot be converted to the specified encoding are represented
500 with the char specified by lossByte; if 0, then lossy conversion is not allowed
501 and conversion stops, returning partial results.
502 Pass buffer==NULL if you don't care about the converted string (but just the convertability,
503 or number of bytes required, indicated by usedBufLen).
504 Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
505
506 Note: This function is intended to work through CFString functions, so it should work
507 with NSStrings as well as CFStrings.
508 */
509 CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
510 CFIndex totalBytesWritten = 0; /* Number of written bytes */
511 CFIndex numCharsProcessed = 0; /* Number of processed chars */
512 const UniChar *unichars;
513
514 if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
515 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
516
517 if (!__CFToUTF8) {
518 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
519 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
520 }
521 numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
522
523 } else if (encoding == kCFStringEncodingNonLossyASCII) {
524 const char *hex = "0123456789abcdef";
525 UniChar ch;
526 CFStringInlineBuffer buf;
527 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
528 while (numCharsProcessed < rangeLen) {
529 CFIndex reqLength; /* Required number of chars to encode this UniChar */
530 CFIndex cnt;
531 char tmp[6];
532 ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
533 if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
534 reqLength = 1;
535 tmp[0] = (char)ch;
536 } else {
537 if (ch == '\\') {
538 tmp[1] = '\\';
539 reqLength = 2;
540 } else if (ch < 256) { /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
541 tmp[1] = '0' + (ch >> 6);
542 tmp[2] = '0' + ((ch >> 3) & 7);
543 tmp[3] = '0' + (ch & 7);
544 reqLength = 4;
545 } else { /* \Unnnn */
546 tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
547 tmp[2] = hex[(ch >> 12) & 0x0f];
548 tmp[3] = hex[(ch >> 8) & 0x0f];
549 tmp[4] = hex[(ch >> 4) & 0x0f];
550 tmp[5] = hex[ch & 0x0f];
551 reqLength = 6;
552 }
553 tmp[0] = '\\';
554 }
555 if (buffer) {
556 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
557 .*/
558 for (cnt = 0; cnt < reqLength; cnt++) {
559 buffer[totalBytesWritten + cnt] = tmp[cnt];
560 }
561 }
562 totalBytesWritten += reqLength;
563 numCharsProcessed++;
564 }
565 } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
566 CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
567 numCharsProcessed = rangeLen;
568 if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
569 numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
570 }
571 totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
572 if (buffer) {
573 if (extraForBOM) { /* Generate BOM */
574 #if __CF_BIG_ENDIAN__
575 *buffer++ = 0xfe; *buffer++ = 0xff;
576 #else
577 *buffer++ = 0xff; *buffer++ = 0xfe;
578 #endif
579 }
580 CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
581 if ((__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap
582 UTF16Char *characters = (UTF16Char *)buffer;
583 const UTF16Char *limit = characters + numCharsProcessed;
584
585 while (characters < limit) {
586 *characters = CFSwapInt16(*characters);
587 ++characters;
588 }
589 }
590 }
591 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
592 UTF32Char character;
593 CFStringInlineBuffer buf;
594 UTF32Char *characters = (UTF32Char *)buffer;
595
596 bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);
597 if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
598 totalBytesWritten += sizeof(UTF32Char);
599 if (characters) {
600 if (totalBytesWritten > max) { // insufficient buffer
601 totalBytesWritten = 0;
602 } else {
603 *(characters++) = 0x0000FEFF;
604 }
605 }
606 }
607
608 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
609 while (numCharsProcessed < rangeLen) {
610 character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
611
612 if (CFUniCharIsSurrogateHighCharacter(character)) {
613 UTF16Char otherCharacter;
614
615 if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
616 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
617 } else if (lossByte) {
618 character = lossByte;
619 } else {
620 break;
621 }
622 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
623 if (lossByte) {
624 character = lossByte;
625 } else {
626 break;
627 }
628 }
629
630 totalBytesWritten += sizeof(UTF32Char);
631
632 if (characters) {
633 if (totalBytesWritten > max) {
634 totalBytesWritten -= sizeof(UTF32Char);
635 break;
636 }
637 *(characters++) = (swap ? CFSwapInt32(character) : character);
638 }
639
640 numCharsProcessed += (character > 0xFFFF ? 2 : 1);
641 }
642 } else {
643 CFIndex numChars;
644 UInt32 flags;
645 const unsigned char *cString = NULL;
646 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
647
648 if (!CFStringEncodingIsValidEncoding(encoding)) return 0;
649
650 if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
651 const unsigned char *ptr;
652 if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
653 ptr = (cString += rangeLoc);
654 if (__CFStringGetEightBitStringEncoding() == encoding) {
655 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
656 if (buffer) memmove(buffer, cString, numCharsProcessed);
657 if (usedBufLen) *usedBufLen = numCharsProcessed;
658 return numCharsProcessed;
659 }
660
661 CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;
662 while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {
663 ++ptr;
664 --rangeLen;
665 }
666 numCharsProcessed = ptr - cString;
667 if (buffer) {
668 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
669 memmove(buffer, cString, numCharsProcessed);
670 buffer += numCharsProcessed;
671 max -= numCharsProcessed;
672 }
673 if (!rangeLen || (buffer && (max == 0))) {
674 if (usedBufLen) *usedBufLen = numCharsProcessed;
675 return numCharsProcessed;
676 }
677 rangeLoc += numCharsProcessed;
678 totalBytesWritten += numCharsProcessed;
679 }
680 if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
681 ptr = (cString += (rangeLoc + 1));
682 if (__CFStringGetEightBitStringEncoding() == encoding) {
683 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
684 if (buffer) memmove(buffer, cString, numCharsProcessed);
685 if (usedBufLen) *usedBufLen = numCharsProcessed;
686 return numCharsProcessed;
687 }
688 while (*ptr < 0x80 && rangeLen > 0) {
689 ++ptr;
690 --rangeLen;
691 }
692 numCharsProcessed = ptr - cString;
693 if (buffer) {
694 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
695 memmove(buffer, cString, numCharsProcessed);
696 buffer += numCharsProcessed;
697 max -= numCharsProcessed;
698 }
699 if (!rangeLen || (buffer && (max == 0))) {
700 if (usedBufLen) *usedBufLen = numCharsProcessed;
701 return numCharsProcessed;
702 }
703 rangeLoc += numCharsProcessed;
704 totalBytesWritten += numCharsProcessed;
705 }
706 }
707
708 if (!buffer) max = 0;
709
710 // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
711 // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
712 flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());
713
714 if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string
715 CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
716 } else {
717 UniChar charBuf[kCFCharConversionBufferLength];
718 CFIndex currentLength;
719 CFIndex usedLen;
720 CFIndex lastUsedLen = 0, lastNumChars = 0;
721 uint32_t result;
722 uint32_t streamingMask;
723 uint32_t streamID = 0;
724 #define MAX_DECOMP_LEN (6)
725
726 while (rangeLen > 0) {
727 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
728 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
729
730 // could be in the middle of surrogate pair; back up.
731 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
732
733 streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);
734
735 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);
736 streamID = CFStringEncodingStreamIDFromMask(result);
737 result &= ~CFStringEncodingStreamIDMask;
738
739 if (result != kCFStringEncodingConversionSuccess) {
740 if (kCFStringEncodingInvalidInputStream == result) {
741 CFRange composedRange;
742 // Check the tail
743 if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
744 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
745
746 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
747 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
748 streamID = CFStringEncodingStreamIDFromMask(result);
749 result &= ~CFStringEncodingStreamIDMask;
750 }
751 }
752
753 // Check the head
754 if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
755 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
756
757 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
758 // Try if the composed range can be converted
759 CFStringGetCharacters(string, composedRange, charBuf);
760
761 if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
762 CFIndex lastRangeLoc = rangeLoc - lastNumChars;
763
764 currentLength = composedRange.location - lastRangeLoc;
765 CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
766
767 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);
768 streamID = CFStringEncodingStreamIDFromMask(result);
769 result &= ~CFStringEncodingStreamIDMask;
770
771 if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run
772 // Looks good. back up
773 totalBytesWritten -= lastUsedLen;
774 numCharsProcessed -= lastNumChars;
775
776 rangeLoc = lastRangeLoc;
777 rangeLen += lastNumChars;
778
779 if (max) {
780 buffer -= lastUsedLen;
781 max += lastUsedLen;
782 }
783 }
784 }
785 }
786 }
787 }
788
789 if (kCFStringEncodingConversionSuccess != result) { // really failed
790 totalBytesWritten += usedLen;
791 numCharsProcessed += numChars;
792 break;
793 }
794 }
795
796 totalBytesWritten += usedLen;
797 numCharsProcessed += numChars;
798
799 rangeLoc += numChars;
800 rangeLen -= numChars;
801 if (max) {
802 buffer += usedLen;
803 max -= usedLen;
804 if (max <= 0) break;
805 }
806 lastUsedLen = usedLen; lastNumChars = numChars;
807 flags &= ~kCFStringEncodingPrependBOM;
808 }
809 }
810 }
811 if (usedBufLen) *usedBufLen = totalBytesWritten;
812 return numCharsProcessed;
813 }
814
815 CFStringRef CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc, const char *buffer) {
816 return CFStringCreateWithCString(alloc, buffer, CFStringFileSystemEncoding());
817 }
818
819 CFIndex CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string) {
820 CFIndex len = CFStringGetLength(string);
821 CFStringEncoding enc = CFStringGetFastestEncoding(string);
822 switch (enc) {
823 case kCFStringEncodingASCII:
824 case kCFStringEncodingMacRoman:
825 if (len > (LONG_MAX - 1L) / 3L) return kCFNotFound; // Avoid wrap-around
826 return len * 3L + 1L;
827 default:
828 if (len > (LONG_MAX - 1L) / 9L) return kCFNotFound; // Avoid wrap-around
829 return len * 9L + 1L;
830 }
831 }
832
833 Boolean CFStringGetFileSystemRepresentation(CFStringRef string, char *buffer, CFIndex maxBufLen) {
834 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
835 #define MAX_STACK_BUFFER_LEN (255)
836 const UTF16Char *characters = CFStringGetCharactersPtr(string);
837 const char *bufferLimit = buffer + maxBufLen;
838 CFIndex length = CFStringGetLength(string);
839 CFIndex usedBufLen;
840
841 if (maxBufLen < length) return false; // Since we're using UTF-8, the byte length is never shorter than the char length. Also, it filters out 0 == maxBufLen
842
843 if (NULL == characters) {
844 UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
845 CFRange range = CFRangeMake(0, 0);
846 const char *bytes = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding());
847
848 if (NULL != bytes) {
849 const char *originalBytes = bytes;
850 const char *bytesLimit = bytes + length;
851
852 while ((bytes < bytesLimit) && (buffer < bufferLimit) && (0 == (*bytes & 0x80))) *(buffer++) = *(bytes++);
853
854 range.location = bytes - originalBytes;
855 }
856 while ((range.location < length) && (buffer < bufferLimit)) {
857 range.length = length - range.location;
858 if (range.length > MAX_STACK_BUFFER_LEN) range.length = MAX_STACK_BUFFER_LEN;
859
860 CFStringGetCharacters(string, range, charactersBuffer);
861 if ((range.length == MAX_STACK_BUFFER_LEN) && CFUniCharIsSurrogateHighCharacter(charactersBuffer[MAX_STACK_BUFFER_LEN - 1])) --range.length; // Backup for a high surrogate
862
863 if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, bufferLimit - buffer, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
864
865 buffer += usedBufLen;
866 range.location += range.length;
867 }
868 } else {
869 if (!CFUniCharDecompose(characters, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
870 buffer += usedBufLen;
871 }
872
873 if (buffer < bufferLimit) { // Since the filename has its own limit, this is ok for now
874 *buffer = '\0';
875 return true;
876 } else {
877 return false;
878 }
879 #else
880 return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
881 #endif
882 }
883
884 Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
885 return CFStringGetFileSystemRepresentation(string, (char *)buffer, maxBufLen);
886 }
887
888
889 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
890
891 /* This function is used to obtain users' default script/region code.
892 The function first looks at environment variable __kCFUserEncodingEnvVariableName, then, reads the configuration file in user's home directory.
893 */
894 void _CFStringGetUserDefaultEncoding(UInt32 *oScriptValue, UInt32 *oRegionValue) {
895 char *stringValue;
896 char buffer[__kCFMaxDefaultEncodingFileLength];
897 int uid = getuid();
898
899 if ((stringValue = getenv(__kCFUserEncodingEnvVariableName)) != NULL) {
900 if ((uid == strtol_l(stringValue, &stringValue, 0, NULL)) && (':' == *stringValue)) {
901 ++stringValue;
902 } else {
903 stringValue = NULL;
904 }
905 }
906
907 if ((stringValue == NULL) && ((uid > 0) || getenv("HOME"))) {
908 char passwdExtraBuf[1000 + MAXPATHLEN]; // Extra memory buffer for getpwuid_r(); no clue as to how large this should be...
909 struct passwd passwdBuf, *passwdp = NULL;
910
911 switch (getpwuid_r((uid_t)uid, &passwdBuf, passwdExtraBuf, sizeof(passwdExtraBuf), &passwdp)) {
912 case 0: // Success
913 break;
914 case ERANGE: // Somehow we didn't give it enough memory; let the system handle the storage this time; but beware 5778609
915 passwdp = getpwuid((uid_t)uid);
916 break;
917 default:
918 passwdp = NULL;
919 }
920 if (passwdp) {
921 char filename[MAXPATHLEN + 1];
922
923 const char *path = NULL;
924 if (!issetugid()) {
925 path = getenv("CFFIXED_USER_HOME");
926 }
927 if (!path) {
928 path = passwdp->pw_dir;
929 }
930
931 strlcpy(filename, path, sizeof(filename));
932 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
933
934 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
935 int fd = open(filename, O_RDONLY, 0);
936 if (fd == -1) {
937 // Cannot open the file. Let's fallback to smRoman/verUS
938 snprintf(filename, sizeof(filename), "0x%X:0:0", uid);
939 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
940 } else {
941 int readSize;
942 readSize = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
943 buffer[(readSize < 0 ? 0 : readSize)] = '\0';
944 close(fd);
945 stringValue = buffer;
946
947 // Well, we already have a buffer, let's reuse it
948 snprintf(filename, sizeof(filename), "0x%X:%s", uid, buffer);
949 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
950 }
951 if (-1 != no_hang_fd) close(no_hang_fd);
952 }
953 }
954
955 if (stringValue) {
956 *oScriptValue = strtol_l(stringValue, &stringValue, 0, NULL);
957 if (*stringValue == ':') {
958 if (oRegionValue) *oRegionValue = strtol_l(++stringValue, NULL, 0, NULL);
959 return;
960 }
961 }
962
963 // Falling back
964 *oScriptValue = 0; // smRoman
965 if (oRegionValue) *oRegionValue = 0; // verUS
966 }
967
968 void _CFStringGetInstallationEncodingAndRegion(uint32_t *encoding, uint32_t *region) {
969 char buffer[__kCFMaxDefaultEncodingFileLength];
970 char *stringValue = NULL;
971
972 *encoding = 0;
973 *region = 0;
974
975 struct passwd *passwdp = getpwuid((uid_t)0);
976 if (passwdp) {
977 const char *path = passwdp->pw_dir;
978
979 char filename[MAXPATHLEN + 1];
980 strlcpy(filename, path, sizeof(filename));
981 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
982
983 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
984 int fd = open(filename, O_RDONLY, 0);
985 if (0 <= fd) {
986 size_t size = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
987 buffer[(size < 0 ? 0 : size)] = '\0';
988 close(fd);
989 stringValue = buffer;
990 }
991 if (-1 != no_hang_fd) close(no_hang_fd);
992 }
993
994 if (stringValue) {
995 *encoding = strtol_l(stringValue, &stringValue, 0, NULL);
996 if (*stringValue == ':') *region = strtol_l(++stringValue, NULL, 0, NULL);
997 }
998 }
999
1000 Boolean _CFStringSaveUserDefaultEncoding(UInt32 iScriptValue, UInt32 iRegionValue) {
1001 Boolean success = false;
1002 struct passwd *passwdp = getpwuid(getuid());
1003 if (passwdp) {
1004 const char *path = passwdp->pw_dir;
1005 if (!issetugid()) {
1006 char *value = getenv("CFFIXED_USER_HOME");
1007 if (value) path = value; // override
1008 }
1009
1010 char filename[MAXPATHLEN + 1];
1011 strlcpy(filename, path, sizeof(filename));
1012 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
1013
1014 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
1015 (void)unlink(filename);
1016 int fd = open(filename, O_WRONLY|O_CREAT, 0400);
1017 if (0 <= fd) {
1018 char buffer[__kCFMaxDefaultEncodingFileLength];
1019 size_t size = snprintf(buffer, __kCFMaxDefaultEncodingFileLength, "0x%X:0x%X", (unsigned int)iScriptValue, (unsigned int)iRegionValue);
1020 if (size <= __kCFMaxDefaultEncodingFileLength) {
1021 int ret = write(fd, buffer, size);
1022 if (size <= ret) success = true;
1023 }
1024 int save_err = errno;
1025 close(fd);
1026 errno = save_err;
1027 }
1028 int save_err = errno;
1029 if (-1 != no_hang_fd) close(no_hang_fd);
1030 errno = save_err;
1031 }
1032 return success;
1033 }
1034
1035 #endif
1036