]> git.saurik.com Git - apple/cf.git/blob - CFStringEncodings.c
47fb7cf20d781fe3cc789ea5d6d8304e3ff3c81f
[apple/cf.git] / CFStringEncodings.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFStringEncodings.c
24 Copyright (c) 1999-2009, Apple Inc. All rights reserved.
25 Responsibility: Aki Inoue
26 */
27
28 #include "CFInternal.h"
29 #include <CoreFoundation/CFString.h>
30 #include <CoreFoundation/CFByteOrder.h>
31 #include <CoreFoundation/CFPriv.h>
32 #include <string.h>
33 #include <CoreFoundation/CFStringEncodingConverterExt.h>
34 #include <CoreFoundation/CFUniChar.h>
35 #include <CoreFoundation/CFUnicodeDecomposition.h>
36 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
37 #include <stdlib.h>
38 #include <fcntl.h>
39 #include <pwd.h>
40 #include <sys/param.h>
41 #include <unistd.h>
42 #include <string.h>
43 #include <stdio.h>
44 #include <xlocale.h>
45 #include <CoreFoundation/CFStringDefaultEncoding.h>
46 #endif
47
48 static UInt32 __CFWantsToUseASCIICompatibleConversion = (UInt32)-1;
49 CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) {
50 if (__CFWantsToUseASCIICompatibleConversion == (UInt32)-1) {
51 __CFWantsToUseASCIICompatibleConversion = false;
52 }
53 return (__CFWantsToUseASCIICompatibleConversion ? kCFStringEncodingASCIICompatibleConversion : 0);
54 }
55
56 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
57 __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
58 }
59
60 Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
61
62 // To avoid early initialization issues, we just initialize this here
63 // This should not be const as it is changed
64 __private_extern__ UniChar __CFCharToUniCharTable[256] = {
65 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
66 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
67 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
68 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
69 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
70 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
71 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
72 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
73 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
74 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
75 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
76 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
77 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
78 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
79 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
80 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
81 };
82
83 __private_extern__ void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
84 if (__CFCharToUniCharFunc != func) {
85 int ch;
86 __CFCharToUniCharFunc = func;
87 if (func) {
88 for (ch = 128; ch < 256; ch++) {
89 UniChar uch;
90 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
91 }
92 } else { // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
93 for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
94 }
95 }
96 }
97
98 __private_extern__ void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
99 CFIndex idx;
100 for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
101 }
102
103
104 /* The minimum length the output buffers should be in the above functions
105 */
106 #define kCFCharConversionBufferLength 512
107
108
109 #define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))
110 #define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))
111
112 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
113 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
114 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
115 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
116 !!! converterFlags is only used for the UTF8 converter at this point
117 */
118 Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
119 return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
120 }
121
122 enum {
123 __NSNonLossyErrorMode = -1,
124 __NSNonLossyASCIIMode = 0,
125 __NSNonLossyBackslashMode = 1,
126 __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
127 __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
128 __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
129 __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
130 };
131
132 Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
133 CFIndex idx;
134 const uint8_t *chars = (const uint8_t *)bytes;
135 const uint8_t *end = chars + len;
136 Boolean result = TRUE;
137
138 if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
139
140 buffer->isASCII = !alwaysUnicode;
141 buffer->shouldFreeChars = false;
142 buffer->numChars = 0;
143
144 if (0 == len) return true;
145
146 buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
147
148 if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16
149 const UTF16Char *src = (const UTF16Char *)bytes;
150 const UTF16Char *limit = (const UTF16Char *)(bytes + len);
151 bool swap = false;
152
153 if (kCFStringEncodingUTF16 == encoding) {
154 UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);
155
156 #if __CF_BIG_ENDIAN__
157 if (bom == 0xFFFE) swap = true;
158 #else
159 if (bom != 0xFEFF) swap = true;
160 #endif
161 if (bom) useClientsMemoryPtr = NULL;
162 } else {
163 #if __CF_BIG_ENDIAN__
164 if (kCFStringEncodingUTF16LE == encoding) swap = true;
165 #else
166 if (kCFStringEncodingUTF16BE == encoding) swap = true;
167 #endif
168 }
169
170 buffer->numChars = limit - src;
171
172 if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
173 *useClientsMemoryPtr = true;
174 buffer->chars.unicode = (UniChar *)src;
175 buffer->isASCII = false;
176 } else {
177 if (buffer->isASCII) { // Let's see if we can reduce the Unicode down to ASCII...
178 const UTF16Char *characters = src;
179 UTF16Char mask = (swap ? 0x80FF : 0xFF80);
180
181 while (characters < limit) {
182 if (*(characters++) & mask) {
183 buffer->isASCII = false;
184 break;
185 }
186 }
187 }
188
189 if (buffer->isASCII) {
190 uint8_t *dst;
191 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
192 if (buffer->numChars > MAX_LOCAL_CHARS) {
193 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
194 if (!buffer->chars.ascii) goto memoryErrorExit;
195 buffer->shouldFreeChars = true;
196 } else {
197 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
198 }
199 }
200 dst = buffer->chars.ascii;
201
202 if (swap) {
203 while (src < limit) *(dst++) = (*(src++) >> 8);
204 } else {
205 while (src < limit) *(dst++) = (uint8_t)*(src++);
206 }
207 } else {
208 UTF16Char *dst;
209
210 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
211 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
212 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
213 if (!buffer->chars.unicode) goto memoryErrorExit;
214 buffer->shouldFreeChars = true;
215 } else {
216 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
217 }
218 }
219 dst = buffer->chars.unicode;
220
221 if (swap) {
222 while (src < limit) *(dst++) = CFSwapInt16(*(src++));
223 } else {
224 memmove(dst, src, buffer->numChars * sizeof(UTF16Char));
225 }
226 }
227 }
228 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
229 const UTF32Char *src = (const UTF32Char *)bytes;
230 const UTF32Char *limit = (const UTF32Char *)(bytes + len);
231 bool swap = false;
232 static bool strictUTF32 = (bool)-1;
233
234 if ((bool)-1 == strictUTF32) strictUTF32 = (_CFExecutableLinkedOnOrAfter(CFSystemVersionLeopard) != 0);
235
236 if (kCFStringEncodingUTF32 == encoding) {
237 UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);
238
239 #if __CF_BIG_ENDIAN__
240 if (bom == 0xFFFE0000) swap = true;
241 #else
242 if (bom != 0x0000FEFF) swap = true;
243 #endif
244 } else {
245 #if __CF_BIG_ENDIAN__
246 if (kCFStringEncodingUTF32LE == encoding) swap = true;
247 #else
248 if (kCFStringEncodingUTF32BE == encoding) swap = true;
249 #endif
250 }
251
252 buffer->numChars = limit - src;
253
254 {
255 // Let's see if we have non-ASCII or non-BMP
256 const UTF32Char *characters = src;
257 UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);
258 UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);
259
260 while (characters < limit) {
261 if (*characters & asciiMask) {
262 buffer->isASCII = false;
263 if (*characters & bmpMask) {
264 if (strictUTF32 && ((swap ? (UTF32Char)CFSwapInt32(*characters) : *characters) > 0x10FFFF)) return false; // outside of Unicode Scaler Value. Haven't allocated buffer, yet.
265 ++(buffer->numChars);
266 }
267 }
268 ++characters;
269 }
270 }
271
272 if (buffer->isASCII) {
273 uint8_t *dst;
274 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
275 if (buffer->numChars > MAX_LOCAL_CHARS) {
276 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
277 if (!buffer->chars.ascii) goto memoryErrorExit;
278 buffer->shouldFreeChars = true;
279 } else {
280 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
281 }
282 }
283 dst = buffer->chars.ascii;
284
285 if (swap) {
286 while (src < limit) *(dst++) = (*(src++) >> 24);
287 } else {
288 while (src < limit) *(dst++) = *(src++);
289 }
290 } else {
291 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
292 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
293 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
294 if (!buffer->chars.unicode) goto memoryErrorExit;
295 buffer->shouldFreeChars = true;
296 } else {
297 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
298 }
299 }
300 result = (CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, (strictUTF32 ? false : true), __CF_BIG_ENDIAN__ ? !swap : swap) ? TRUE : FALSE);
301 }
302 } else if (kCFStringEncodingUTF8 == encoding) {
303 if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) { // If UTF8 BOM, skip
304 chars += 3;
305 len -= 3;
306 if (0 == len) return true;
307 }
308 if (buffer->isASCII) {
309 for (idx = 0; idx < len; idx++) {
310 if (128 <= chars[idx]) {
311 buffer->isASCII = false;
312 break;
313 }
314 }
315 }
316 if (buffer->isASCII) {
317 buffer->numChars = len;
318 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
319 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
320 if (!buffer->chars.ascii) goto memoryErrorExit;
321 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
322 } else {
323 CFIndex numDone;
324 static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
325
326 if (!__CFFromUTF8) {
327 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
328 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
329 }
330
331 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
332 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
333 if (!buffer->chars.unicode) goto memoryErrorExit;
334 buffer->numChars = 0;
335 while (chars < end) {
336 numDone = 0;
337 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
338
339 if (0 == numDone) {
340 result = FALSE;
341 break;
342 }
343 buffer->numChars += numDone;
344 }
345 }
346 } else if (kCFStringEncodingNonLossyASCII == encoding) {
347 UTF16Char currentValue = 0;
348 uint8_t character;
349 int8_t mode = __NSNonLossyASCIIMode;
350
351 buffer->isASCII = false;
352 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
353 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
354 if (!buffer->chars.unicode) goto memoryErrorExit;
355 buffer->numChars = 0;
356
357 while (chars < end) {
358 character = (*chars++);
359
360 switch (mode) {
361 case __NSNonLossyASCIIMode:
362 if (character == '\\') {
363 mode = __NSNonLossyBackslashMode;
364 } else if (character < 0x80) {
365 currentValue = character;
366 } else {
367 mode = __NSNonLossyErrorMode;
368 }
369 break;
370
371 case __NSNonLossyBackslashMode:
372 if ((character == 'U') || (character == 'u')) {
373 mode = __NSNonLossyHexInitialMode;
374 currentValue = 0;
375 } else if ((character >= '0') && (character <= '9')) {
376 mode = __NSNonLossyOctalInitialMode;
377 currentValue = character - '0';
378 } else if (character == '\\') {
379 mode = __NSNonLossyASCIIMode;
380 currentValue = character;
381 } else {
382 mode = __NSNonLossyErrorMode;
383 }
384 break;
385
386 default:
387 if (mode < __NSNonLossyHexFinalMode) {
388 if ((character >= '0') && (character <= '9')) {
389 currentValue = (currentValue << 4) | (character - '0');
390 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
391 } else {
392 if (character >= 'a') character -= ('a' - 'A');
393 if ((character >= 'A') && (character <= 'F')) {
394 currentValue = (currentValue << 4) | ((character - 'A') + 10);
395 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
396 } else {
397 mode = __NSNonLossyErrorMode;
398 }
399 }
400 } else {
401 if ((character >= '0') && (character <= '9')) {
402 currentValue = (currentValue << 3) | (character - '0');
403 if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
404 } else {
405 mode = __NSNonLossyErrorMode;
406 }
407 }
408 break;
409 }
410
411 if (mode == __NSNonLossyASCIIMode) {
412 buffer->chars.unicode[buffer->numChars++] = currentValue;
413 } else if (mode == __NSNonLossyErrorMode) {
414 break;
415 }
416 }
417 result = ((mode == __NSNonLossyASCIIMode) ? YES : NO);
418 } else {
419 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
420
421 if (!converter) return false;
422
423 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
424
425 if (!isASCIISuperset) buffer->isASCII = false;
426
427 if (buffer->isASCII) {
428 for (idx = 0; idx < len; idx++) {
429 if (128 <= chars[idx]) {
430 buffer->isASCII = false;
431 break;
432 }
433 }
434 }
435
436 if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
437 if (buffer->isASCII) {
438 buffer->numChars = len;
439 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
440 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
441 if (!buffer->chars.ascii) goto memoryErrorExit;
442 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
443 } else {
444 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
445 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
446 if (!buffer->chars.unicode) goto memoryErrorExit;
447 buffer->numChars = len;
448 if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
449 for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
450 } else {
451 for (idx = 0; idx < len; idx++) {
452 if (chars[idx] < 0x80 && isASCIISuperset) {
453 buffer->chars.unicode[idx] = (UniChar)chars[idx];
454 } else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx)) {
455 result = FALSE;
456 break;
457 }
458 }
459 }
460 }
461 } else {
462 if (buffer->isASCII) {
463 buffer->numChars = len;
464 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
465 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
466 if (!buffer->chars.ascii) goto memoryErrorExit;
467 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
468 } else {
469 CFIndex guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
470 static UInt32 lossyFlag = (UInt32)-1;
471
472 buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
473 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
474 if (!buffer->chars.unicode) goto memoryErrorExit;
475
476 if (lossyFlag == (UInt32)-1) lossyFlag = (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? 0 : kCFStringEncodingAllowLossyConversion);
477
478 if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) result = FALSE;
479 }
480 }
481 }
482
483 if (FALSE == result) {
484 memoryErrorExit: // Added for <rdar://problem/6581621>, but it's not clear whether an exception would be a better option
485 result = FALSE; // In case we come here from a goto
486 if (buffer->shouldFreeChars && buffer->chars.unicode) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
487 buffer->isASCII = !alwaysUnicode;
488 buffer->shouldFreeChars = false;
489 buffer->chars.ascii = NULL;
490 buffer->numChars = 0;
491 }
492 return result;
493 }
494
495
496 /* Create a byte stream from a CFString backing. Can convert a string piece at a time
497 into a fixed size buffer. Returns number of characters converted.
498 Characters that cannot be converted to the specified encoding are represented
499 with the char specified by lossByte; if 0, then lossy conversion is not allowed
500 and conversion stops, returning partial results.
501 Pass buffer==NULL if you don't care about the converted string (but just the convertability,
502 or number of bytes required, indicated by usedBufLen).
503 Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
504
505 Note: This function is intended to work through CFString functions, so it should work
506 with NSStrings as well as CFStrings.
507 */
508 CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
509 CFIndex totalBytesWritten = 0; /* Number of written bytes */
510 CFIndex numCharsProcessed = 0; /* Number of processed chars */
511 const UniChar *unichars;
512
513 if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
514 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
515
516 if (!__CFToUTF8) {
517 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
518 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
519 }
520 numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
521
522 } else if (encoding == kCFStringEncodingNonLossyASCII) {
523 const char *hex = "0123456789abcdef";
524 UniChar ch;
525 CFStringInlineBuffer buf;
526 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
527 while (numCharsProcessed < rangeLen) {
528 CFIndex reqLength; /* Required number of chars to encode this UniChar */
529 CFIndex cnt;
530 char tmp[6];
531 ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
532 if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
533 reqLength = 1;
534 tmp[0] = (char)ch;
535 } else {
536 if (ch == '\\') {
537 tmp[1] = '\\';
538 reqLength = 2;
539 } else if (ch < 256) { /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
540 tmp[1] = '0' + (ch >> 6);
541 tmp[2] = '0' + ((ch >> 3) & 7);
542 tmp[3] = '0' + (ch & 7);
543 reqLength = 4;
544 } else { /* \Unnnn */
545 tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
546 tmp[2] = hex[(ch >> 12) & 0x0f];
547 tmp[3] = hex[(ch >> 8) & 0x0f];
548 tmp[4] = hex[(ch >> 4) & 0x0f];
549 tmp[5] = hex[ch & 0x0f];
550 reqLength = 6;
551 }
552 tmp[0] = '\\';
553 }
554 if (buffer) {
555 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
556 .*/
557 for (cnt = 0; cnt < reqLength; cnt++) {
558 buffer[totalBytesWritten + cnt] = tmp[cnt];
559 }
560 }
561 totalBytesWritten += reqLength;
562 numCharsProcessed++;
563 }
564 } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
565 CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
566 numCharsProcessed = rangeLen;
567 if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
568 numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
569 }
570 totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
571 if (buffer) {
572 if (extraForBOM) { /* Generate BOM */
573 #if __CF_BIG_ENDIAN__
574 *buffer++ = 0xfe; *buffer++ = 0xff;
575 #else
576 *buffer++ = 0xff; *buffer++ = 0xfe;
577 #endif
578 }
579 CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
580 if ((__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap
581 UTF16Char *characters = (UTF16Char *)buffer;
582 const UTF16Char *limit = characters + numCharsProcessed;
583
584 while (characters < limit) {
585 *characters = CFSwapInt16(*characters);
586 ++characters;
587 }
588 }
589 }
590 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
591 UTF32Char character;
592 CFStringInlineBuffer buf;
593 UTF32Char *characters = (UTF32Char *)buffer;
594
595 bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);
596 if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
597 totalBytesWritten += sizeof(UTF32Char);
598 if (characters) {
599 if (totalBytesWritten > max) { // insufficient buffer
600 totalBytesWritten = 0;
601 } else {
602 *(characters++) = 0x0000FEFF;
603 }
604 }
605 }
606
607 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
608 while (numCharsProcessed < rangeLen) {
609 character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
610
611 if (CFUniCharIsSurrogateHighCharacter(character)) {
612 UTF16Char otherCharacter;
613
614 if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
615 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
616 } else if (lossByte) {
617 character = lossByte;
618 } else {
619 break;
620 }
621 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
622 if (lossByte) {
623 character = lossByte;
624 } else {
625 break;
626 }
627 }
628
629 totalBytesWritten += sizeof(UTF32Char);
630
631 if (characters) {
632 if (totalBytesWritten > max) {
633 totalBytesWritten -= sizeof(UTF32Char);
634 break;
635 }
636 *(characters++) = (swap ? CFSwapInt32(character) : character);
637 }
638
639 numCharsProcessed += (character > 0xFFFF ? 2 : 1);
640 }
641 } else {
642 CFIndex numChars;
643 UInt32 flags;
644 const unsigned char *cString = NULL;
645 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
646
647 if (!CFStringEncodingIsValidEncoding(encoding)) return 0;
648
649 if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
650 const unsigned char *ptr;
651 if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
652 ptr = (cString += rangeLoc);
653 if (__CFStringGetEightBitStringEncoding() == encoding) {
654 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
655 if (buffer) memmove(buffer, cString, numCharsProcessed);
656 if (usedBufLen) *usedBufLen = numCharsProcessed;
657 return numCharsProcessed;
658 }
659
660 CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;
661 while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {
662 ++ptr;
663 --rangeLen;
664 }
665 numCharsProcessed = ptr - cString;
666 if (buffer) {
667 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
668 memmove(buffer, cString, numCharsProcessed);
669 buffer += numCharsProcessed;
670 max -= numCharsProcessed;
671 }
672 if (!rangeLen || (buffer && (max == 0))) {
673 if (usedBufLen) *usedBufLen = numCharsProcessed;
674 return numCharsProcessed;
675 }
676 rangeLoc += numCharsProcessed;
677 totalBytesWritten += numCharsProcessed;
678 }
679 if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
680 ptr = (cString += (rangeLoc + 1));
681 if (__CFStringGetEightBitStringEncoding() == encoding) {
682 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
683 if (buffer) memmove(buffer, cString, numCharsProcessed);
684 if (usedBufLen) *usedBufLen = numCharsProcessed;
685 return numCharsProcessed;
686 }
687 while (*ptr < 0x80 && rangeLen > 0) {
688 ++ptr;
689 --rangeLen;
690 }
691 numCharsProcessed = ptr - cString;
692 if (buffer) {
693 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
694 memmove(buffer, cString, numCharsProcessed);
695 buffer += numCharsProcessed;
696 max -= numCharsProcessed;
697 }
698 if (!rangeLen || (buffer && (max == 0))) {
699 if (usedBufLen) *usedBufLen = numCharsProcessed;
700 return numCharsProcessed;
701 }
702 rangeLoc += numCharsProcessed;
703 totalBytesWritten += numCharsProcessed;
704 }
705 }
706
707 if (!buffer) max = 0;
708
709 // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
710 // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
711 flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());
712
713 if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string
714 CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
715 } else {
716 UniChar charBuf[kCFCharConversionBufferLength];
717 CFIndex currentLength;
718 CFIndex usedLen;
719 CFIndex lastUsedLen = 0, lastNumChars = 0;
720 uint32_t result;
721 uint32_t streamingMask;
722 uint32_t streamID = 0;
723 #define MAX_DECOMP_LEN (6)
724
725 while (rangeLen > 0) {
726 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
727 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
728
729 // could be in the middle of surrogate pair; back up.
730 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
731
732 streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);
733
734 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);
735 streamID = CFStringEncodingStreamIDFromMask(result);
736 result &= ~CFStringEncodingStreamIDMask;
737
738 if (result != kCFStringEncodingConversionSuccess) {
739 if (kCFStringEncodingInvalidInputStream == result) {
740 CFRange composedRange;
741 // Check the tail
742 if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
743 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
744
745 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
746 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
747 streamID = CFStringEncodingStreamIDFromMask(result);
748 result &= ~CFStringEncodingStreamIDMask;
749 }
750 }
751
752 // Check the head
753 if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
754 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
755
756 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
757 // Try if the composed range can be converted
758 CFStringGetCharacters(string, composedRange, charBuf);
759
760 if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
761 CFIndex lastRangeLoc = rangeLoc - lastNumChars;
762
763 currentLength = composedRange.location - lastRangeLoc;
764 CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
765
766 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);
767 streamID = CFStringEncodingStreamIDFromMask(result);
768 result &= ~CFStringEncodingStreamIDMask;
769
770 if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run
771 // Looks good. back up
772 totalBytesWritten -= lastUsedLen;
773 numCharsProcessed -= lastNumChars;
774
775 rangeLoc = lastRangeLoc;
776 rangeLen += lastNumChars;
777
778 if (max) {
779 buffer -= lastUsedLen;
780 max += lastUsedLen;
781 }
782 }
783 }
784 }
785 }
786 }
787
788 if (kCFStringEncodingConversionSuccess != result) { // really failed
789 totalBytesWritten += usedLen;
790 numCharsProcessed += numChars;
791 break;
792 }
793 }
794
795 totalBytesWritten += usedLen;
796 numCharsProcessed += numChars;
797
798 rangeLoc += numChars;
799 rangeLen -= numChars;
800 if (max) {
801 buffer += usedLen;
802 max -= usedLen;
803 if (max <= 0) break;
804 }
805 lastUsedLen = usedLen; lastNumChars = numChars;
806 flags &= ~kCFStringEncodingPrependBOM;
807 }
808 }
809 }
810 if (usedBufLen) *usedBufLen = totalBytesWritten;
811 return numCharsProcessed;
812 }
813
814 CFStringRef CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc, const char *buffer) {
815 return CFStringCreateWithCString(alloc, buffer, CFStringFileSystemEncoding());
816 }
817
818 CFIndex CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string) {
819 CFIndex len = CFStringGetLength(string);
820 CFStringEncoding enc = CFStringGetFastestEncoding(string);
821 switch (enc) {
822 case kCFStringEncodingASCII:
823 case kCFStringEncodingMacRoman:
824 if (len > (LONG_MAX - 1L) / 3L) return kCFNotFound; // Avoid wrap-around
825 return len * 3L + 1L;
826 default:
827 if (len > (LONG_MAX - 1L) / 9L) return kCFNotFound; // Avoid wrap-around
828 return len * 9L + 1L;
829 }
830 }
831
832 Boolean CFStringGetFileSystemRepresentation(CFStringRef string, char *buffer, CFIndex maxBufLen) {
833 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
834 #define MAX_STACK_BUFFER_LEN (255)
835 const UTF16Char *characters = CFStringGetCharactersPtr(string);
836 const char *bufferLimit = buffer + maxBufLen;
837 CFIndex length = CFStringGetLength(string);
838 CFIndex usedBufLen;
839
840 if (maxBufLen < length) return false; // Since we're using UTF-8, the byte length is never shorter than the char length. Also, it filters out 0 == maxBufLen
841
842 if (NULL == characters) {
843 UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
844 CFRange range = CFRangeMake(0, 0);
845 const char *bytes = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding());
846
847 if (NULL != bytes) {
848 const char *originalBytes = bytes;
849 const char *bytesLimit = bytes + length;
850
851 while ((bytes < bytesLimit) && (buffer < bufferLimit) && (0 == (*bytes & 0x80))) *(buffer++) = *(bytes++);
852
853 range.location = bytes - originalBytes;
854 }
855 while ((range.location < length) && (buffer < bufferLimit)) {
856 range.length = length - range.location;
857 if (range.length > MAX_STACK_BUFFER_LEN) range.length = MAX_STACK_BUFFER_LEN;
858
859 CFStringGetCharacters(string, range, charactersBuffer);
860 if ((range.length == MAX_STACK_BUFFER_LEN) && CFUniCharIsSurrogateHighCharacter(charactersBuffer[MAX_STACK_BUFFER_LEN - 1])) --range.length; // Backup for a high surrogate
861
862 if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, bufferLimit - buffer, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
863
864 buffer += usedBufLen;
865 range.location += range.length;
866 }
867 } else {
868 if (!CFUniCharDecompose(characters, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
869 buffer += usedBufLen;
870 }
871
872 if (buffer < bufferLimit) { // Since the filename has its own limit, this is ok for now
873 *buffer = '\0';
874 return true;
875 } else {
876 return false;
877 }
878 #else
879 return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
880 #endif
881 }
882
883 Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
884 return CFStringGetFileSystemRepresentation(string, (char *)buffer, maxBufLen);
885 }
886
887
888 #if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
889
890 /* This function is used to obtain users' default script/region code.
891 The function first looks at environment variable __kCFUserEncodingEnvVariableName, then, reads the configuration file in user's home directory.
892 */
893 void _CFStringGetUserDefaultEncoding(UInt32 *oScriptValue, UInt32 *oRegionValue) {
894 char *stringValue;
895 char buffer[__kCFMaxDefaultEncodingFileLength];
896 int uid = getuid();
897
898 if ((stringValue = getenv(__kCFUserEncodingEnvVariableName)) != NULL) {
899 if ((uid == strtol_l(stringValue, &stringValue, 0, NULL)) && (':' == *stringValue)) {
900 ++stringValue;
901 } else {
902 stringValue = NULL;
903 }
904 }
905
906 if ((stringValue == NULL) && ((uid > 0) || getenv("HOME"))) {
907 char passwdExtraBuf[1000 + MAXPATHLEN]; // Extra memory buffer for getpwuid_r(); no clue as to how large this should be...
908 struct passwd passwdBuf, *passwdp = NULL;
909
910 switch (getpwuid_r((uid_t)uid, &passwdBuf, passwdExtraBuf, sizeof(passwdExtraBuf), &passwdp)) {
911 case 0: // Success
912 break;
913 case ERANGE: // Somehow we didn't give it enough memory; let the system handle the storage this time; but beware 5778609
914 passwdp = getpwuid((uid_t)uid);
915 break;
916 default:
917 passwdp = NULL;
918 }
919 if (passwdp) {
920 char filename[MAXPATHLEN + 1];
921
922 const char *path = NULL;
923 if (!issetugid()) {
924 path = getenv("CFFIXED_USER_HOME");
925 }
926 if (!path) {
927 path = passwdp->pw_dir;
928 }
929
930 strlcpy(filename, path, sizeof(filename));
931 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
932
933 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
934 int fd = open(filename, O_RDONLY, 0);
935 if (fd == -1) {
936 // Cannot open the file. Let's fallback to smRoman/verUS
937 snprintf(filename, sizeof(filename), "0x%X:0:0", uid);
938 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
939 } else {
940 int readSize;
941 readSize = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
942 buffer[(readSize < 0 ? 0 : readSize)] = '\0';
943 close(fd);
944 stringValue = buffer;
945
946 // Well, we already have a buffer, let's reuse it
947 snprintf(filename, sizeof(filename), "0x%X:%s", uid, buffer);
948 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
949 }
950 if (-1 != no_hang_fd) close(no_hang_fd);
951 }
952 }
953
954 if (stringValue) {
955 *oScriptValue = strtol_l(stringValue, &stringValue, 0, NULL);
956 if (*stringValue == ':') {
957 if (oRegionValue) *oRegionValue = strtol_l(++stringValue, NULL, 0, NULL);
958 return;
959 }
960 }
961
962 // Falling back
963 *oScriptValue = 0; // smRoman
964 if (oRegionValue) *oRegionValue = 0; // verUS
965 }
966
967 void _CFStringGetInstallationEncodingAndRegion(uint32_t *encoding, uint32_t *region) {
968 char buffer[__kCFMaxDefaultEncodingFileLength];
969 char *stringValue = NULL;
970
971 *encoding = 0;
972 *region = 0;
973
974 struct passwd *passwdp = getpwuid((uid_t)0);
975 if (passwdp) {
976 const char *path = passwdp->pw_dir;
977
978 char filename[MAXPATHLEN + 1];
979 strlcpy(filename, path, sizeof(filename));
980 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
981
982 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
983 int fd = open(filename, O_RDONLY, 0);
984 if (0 <= fd) {
985 size_t size = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
986 buffer[(size < 0 ? 0 : size)] = '\0';
987 close(fd);
988 stringValue = buffer;
989 }
990 if (-1 != no_hang_fd) close(no_hang_fd);
991 }
992
993 if (stringValue) {
994 *encoding = strtol_l(stringValue, &stringValue, 0, NULL);
995 if (*stringValue == ':') *region = strtol_l(++stringValue, NULL, 0, NULL);
996 }
997 }
998
999 Boolean _CFStringSaveUserDefaultEncoding(UInt32 iScriptValue, UInt32 iRegionValue) {
1000 Boolean success = false;
1001 struct passwd *passwdp = getpwuid(getuid());
1002 if (passwdp) {
1003 const char *path = passwdp->pw_dir;
1004 if (!issetugid()) {
1005 char *value = getenv("CFFIXED_USER_HOME");
1006 if (value) path = value; // override
1007 }
1008
1009 char filename[MAXPATHLEN + 1];
1010 strlcpy(filename, path, sizeof(filename));
1011 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
1012
1013 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
1014 (void)unlink(filename);
1015 int fd = open(filename, O_WRONLY|O_CREAT, 0400);
1016 if (0 <= fd) {
1017 char buffer[__kCFMaxDefaultEncodingFileLength];
1018 size_t size = snprintf(buffer, __kCFMaxDefaultEncodingFileLength, "0x%X:0x%X", (unsigned int)iScriptValue, (unsigned int)iRegionValue);
1019 if (size <= __kCFMaxDefaultEncodingFileLength) {
1020 int ret = write(fd, buffer, size);
1021 if (size <= ret) success = true;
1022 }
1023 int save_err = errno;
1024 close(fd);
1025 errno = save_err;
1026 }
1027 int save_err = errno;
1028 if (-1 != no_hang_fd) close(no_hang_fd);
1029 errno = save_err;
1030 }
1031 return success;
1032 }
1033
1034 #endif
1035