]> git.saurik.com Git - apple/cf.git/blame - CFStringEncodings.c
CF-1153.18.tar.gz
[apple/cf.git] / CFStringEncodings.c
CommitLineData
9ce05555 1/*
e29e285d 2 * Copyright (c) 2015 Apple Inc. All rights reserved.
9ce05555
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
d7384798 5 *
9ce05555
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
d7384798 12 *
9ce05555
A
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
d7384798 20 *
9ce05555
A
21 * @APPLE_LICENSE_HEADER_END@
22 */
f64f9b69 23
9ce05555 24/* CFStringEncodings.c
d7384798 25 Copyright (c) 1999-2014, Apple Inc. All rights reserved.
9ce05555
A
26 Responsibility: Aki Inoue
27*/
28
29#include "CFInternal.h"
30#include <CoreFoundation/CFString.h>
31#include <CoreFoundation/CFByteOrder.h>
cf7d2af9 32#include <CoreFoundation/CFPriv.h>
9ce05555 33#include <string.h>
cf7d2af9
A
34#include <CoreFoundation/CFStringEncodingConverterExt.h>
35#include <CoreFoundation/CFUniChar.h>
36#include <CoreFoundation/CFUnicodeDecomposition.h>
37#if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
38#include <stdlib.h>
39#include <fcntl.h>
40#include <pwd.h>
41#include <sys/param.h>
42#include <unistd.h>
43#include <string.h>
44#include <stdio.h>
45#include <xlocale.h>
46#include <CoreFoundation/CFStringDefaultEncoding.h>
47#endif
9ce05555 48
856091c5
A
49static bool __CFWantsToUseASCIICompatibleConversion = false;
50CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) { return __CFWantsToUseASCIICompatibleConversion; }
9ce05555
A
51
52void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
53 __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
54}
55
56Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
57
58// To avoid early initialization issues, we just initialize this here
59// This should not be const as it is changed
a48904a4 60CF_PRIVATE UniChar __CFCharToUniCharTable[256] = {
9ce05555
A
61 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
62 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
63 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
64 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
65 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
66 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
67 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
68112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
69128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
70144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
71160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
72176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
73192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
74208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
75224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
76240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
77};
78
a48904a4 79CF_PRIVATE void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
9ce05555
A
80 if (__CFCharToUniCharFunc != func) {
81 int ch;
82 __CFCharToUniCharFunc = func;
83 if (func) {
84 for (ch = 128; ch < 256; ch++) {
85 UniChar uch;
86 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
87 }
88 } else { // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
89 for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
90 }
91 }
92}
93
a48904a4 94CF_PRIVATE void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
9ce05555
A
95 CFIndex idx;
96 for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
97}
98
99
100/* The minimum length the output buffers should be in the above functions
101*/
102#define kCFCharConversionBufferLength 512
103
104
105#define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))
106#define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))
107
9ce05555
A
108/* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
1099/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
110Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
111__CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
112!!! converterFlags is only used for the UTF8 converter at this point
113*/
114Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
115 return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
116}
117
118enum {
119 __NSNonLossyErrorMode = -1,
120 __NSNonLossyASCIIMode = 0,
121 __NSNonLossyBackslashMode = 1,
122 __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
123 __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
124 __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
125 __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
126};
127
bd5b749c 128Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
cf7d2af9
A
129 CFIndex idx;
130 const uint8_t *chars = (const uint8_t *)bytes;
131 const uint8_t *end = chars + len;
132 Boolean result = TRUE;
9ce05555
A
133
134 if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
135
136 buffer->isASCII = !alwaysUnicode;
137 buffer->shouldFreeChars = false;
138 buffer->numChars = 0;
d8925383 139
9ce05555
A
140 if (0 == len) return true;
141
142 buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
d8925383
A
143
144 if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16
145 const UTF16Char *src = (const UTF16Char *)bytes;
8ca704e1 146 const UTF16Char *limit = src + (len / sizeof(UTF16Char)); // <rdar://problem/7854378> avoiding odd len issue
d8925383
A
147 bool swap = false;
148
149 if (kCFStringEncodingUTF16 == encoding) {
150 UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);
151
bd5b749c 152#if __CF_BIG_ENDIAN__
d8925383
A
153 if (bom == 0xFFFE) swap = true;
154#else
155 if (bom != 0xFEFF) swap = true;
156#endif
157 if (bom) useClientsMemoryPtr = NULL;
158 } else {
bd5b749c 159#if __CF_BIG_ENDIAN__
d8925383
A
160 if (kCFStringEncodingUTF16LE == encoding) swap = true;
161#else
162 if (kCFStringEncodingUTF16BE == encoding) swap = true;
163#endif
9ce05555
A
164 }
165
d8925383
A
166 buffer->numChars = limit - src;
167
168 if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
169 *useClientsMemoryPtr = true;
170 buffer->chars.unicode = (UniChar *)src;
171 buffer->isASCII = false;
9ce05555 172 } else {
d8925383
A
173 if (buffer->isASCII) { // Let's see if we can reduce the Unicode down to ASCII...
174 const UTF16Char *characters = src;
175 UTF16Char mask = (swap ? 0x80FF : 0xFF80);
176
177 while (characters < limit) {
178 if (*(characters++) & mask) {
179 buffer->isASCII = false;
180 break;
181 }
9ce05555
A
182 }
183 }
d8925383
A
184
185 if (buffer->isASCII) {
186 uint8_t *dst;
187 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
188 if (buffer->numChars > MAX_LOCAL_CHARS) {
bd5b749c 189 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
cf7d2af9 190 if (!buffer->chars.ascii) goto memoryErrorExit;
d8925383
A
191 buffer->shouldFreeChars = true;
192 } else {
193 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
194 }
9ce05555 195 }
d8925383
A
196 dst = buffer->chars.ascii;
197
198 if (swap) {
199 while (src < limit) *(dst++) = (*(src++) >> 8);
9ce05555 200 } else {
bd5b749c 201 while (src < limit) *(dst++) = (uint8_t)*(src++);
9ce05555 202 }
d8925383
A
203 } else {
204 UTF16Char *dst;
9ce05555 205
d8925383
A
206 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
207 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
bd5b749c 208 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
cf7d2af9 209 if (!buffer->chars.unicode) goto memoryErrorExit;
d8925383 210 buffer->shouldFreeChars = true;
9ce05555 211 } else {
d8925383 212 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
9ce05555 213 }
d8925383
A
214 }
215 dst = buffer->chars.unicode;
216
217 if (swap) {
218 while (src < limit) *(dst++) = CFSwapInt16(*(src++));
9ce05555 219 } else {
d8925383 220 memmove(dst, src, buffer->numChars * sizeof(UTF16Char));
9ce05555 221 }
9ce05555 222 }
d8925383
A
223 }
224 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
225 const UTF32Char *src = (const UTF32Char *)bytes;
8ca704e1 226 const UTF32Char *limit = src + (len / sizeof(UTF32Char)); // <rdar://problem/7854378> avoiding odd len issue
d8925383 227 bool swap = false;
bd5b749c
A
228 static bool strictUTF32 = (bool)-1;
229
856091c5 230 if ((bool)-1 == strictUTF32) strictUTF32 = (1 != 0);
9ce05555 231
d8925383
A
232 if (kCFStringEncodingUTF32 == encoding) {
233 UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);
9ce05555 234
bd5b749c 235#if __CF_BIG_ENDIAN__
d8925383
A
236 if (bom == 0xFFFE0000) swap = true;
237#else
238 if (bom != 0x0000FEFF) swap = true;
239#endif
240 } else {
bd5b749c 241#if __CF_BIG_ENDIAN__
d8925383
A
242 if (kCFStringEncodingUTF32LE == encoding) swap = true;
243#else
244 if (kCFStringEncodingUTF32BE == encoding) swap = true;
245#endif
9ce05555 246 }
d8925383
A
247
248 buffer->numChars = limit - src;
249
250 {
251 // Let's see if we have non-ASCII or non-BMP
252 const UTF32Char *characters = src;
253 UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);
254 UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);
255
256 while (characters < limit) {
257 if (*characters & asciiMask) {
258 buffer->isASCII = false;
bd5b749c 259 if (*characters & bmpMask) {
cf7d2af9 260 if (strictUTF32 && ((swap ? (UTF32Char)CFSwapInt32(*characters) : *characters) > 0x10FFFF)) return false; // outside of Unicode Scaler Value. Haven't allocated buffer, yet.
bd5b749c
A
261 ++(buffer->numChars);
262 }
9ce05555 263 }
d8925383 264 ++characters;
9ce05555
A
265 }
266 }
d8925383
A
267
268 if (buffer->isASCII) {
269 uint8_t *dst;
270 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
271 if (buffer->numChars > MAX_LOCAL_CHARS) {
bd5b749c 272 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
cf7d2af9 273 if (!buffer->chars.ascii) goto memoryErrorExit;
d8925383
A
274 buffer->shouldFreeChars = true;
275 } else {
276 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
277 }
9ce05555 278 }
d8925383 279 dst = buffer->chars.ascii;
9ce05555 280
d8925383
A
281 if (swap) {
282 while (src < limit) *(dst++) = (*(src++) >> 24);
283 } else {
284 while (src < limit) *(dst++) = *(src++);
285 }
286 } else {
287 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
288 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
bd5b749c 289 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
cf7d2af9 290 if (!buffer->chars.unicode) goto memoryErrorExit;
d8925383
A
291 buffer->shouldFreeChars = true;
292 } else {
293 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
294 }
295 }
cf7d2af9 296 result = (CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, (strictUTF32 ? false : true), __CF_BIG_ENDIAN__ ? !swap : swap) ? TRUE : FALSE);
d8925383 297 }
cf7d2af9
A
298 } else if (kCFStringEncodingUTF8 == encoding) {
299 if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) { // If UTF8 BOM, skip
300 chars += 3;
301 len -= 3;
302 if (0 == len) return true;
303 }
304 if (buffer->isASCII) {
305 for (idx = 0; idx < len; idx++) {
306 if (128 <= chars[idx]) {
307 buffer->isASCII = false;
308 break;
309 }
310 }
311 }
312 if (buffer->isASCII) {
313 buffer->numChars = len;
314 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
315 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
316 if (!buffer->chars.ascii) goto memoryErrorExit;
317 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
318 } else {
319 CFIndex numDone;
320 static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
321
322 if (!__CFFromUTF8) {
323 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
324 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
325 }
326
9ce05555 327 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
bd5b749c 328 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
cf7d2af9 329 if (!buffer->chars.unicode) goto memoryErrorExit;
9ce05555
A
330 buffer->numChars = 0;
331 while (chars < end) {
cf7d2af9
A
332 numDone = 0;
333 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
334
335 if (0 == numDone) {
336 result = FALSE;
337 break;
338 }
339 buffer->numChars += numDone;
340 }
341 }
342 } else if (kCFStringEncodingNonLossyASCII == encoding) {
343 UTF16Char currentValue = 0;
344 uint8_t character;
345 int8_t mode = __NSNonLossyASCIIMode;
346
347 buffer->isASCII = false;
348 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
349 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
350 if (!buffer->chars.unicode) goto memoryErrorExit;
351 buffer->numChars = 0;
352
353 while (chars < end) {
354 character = (*chars++);
355
356 switch (mode) {
d8925383
A
357 case __NSNonLossyASCIIMode:
358 if (character == '\\') {
359 mode = __NSNonLossyBackslashMode;
360 } else if (character < 0x80) {
361 currentValue = character;
362 } else {
363 mode = __NSNonLossyErrorMode;
364 }
365 break;
cf7d2af9
A
366
367 case __NSNonLossyBackslashMode:
d8925383
A
368 if ((character == 'U') || (character == 'u')) {
369 mode = __NSNonLossyHexInitialMode;
370 currentValue = 0;
371 } else if ((character >= '0') && (character <= '9')) {
372 mode = __NSNonLossyOctalInitialMode;
373 currentValue = character - '0';
374 } else if (character == '\\') {
375 mode = __NSNonLossyASCIIMode;
376 currentValue = character;
377 } else {
378 mode = __NSNonLossyErrorMode;
379 }
380 break;
cf7d2af9
A
381
382 default:
d8925383
A
383 if (mode < __NSNonLossyHexFinalMode) {
384 if ((character >= '0') && (character <= '9')) {
385 currentValue = (currentValue << 4) | (character - '0');
386 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
387 } else {
388 if (character >= 'a') character -= ('a' - 'A');
389 if ((character >= 'A') && (character <= 'F')) {
390 currentValue = (currentValue << 4) | ((character - 'A') + 10);
391 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
392 } else {
393 mode = __NSNonLossyErrorMode;
394 }
395 }
396 } else {
397 if ((character >= '0') && (character <= '9')) {
398 currentValue = (currentValue << 3) | (character - '0');
399 if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
400 } else {
401 mode = __NSNonLossyErrorMode;
402 }
403 }
404 break;
9ce05555 405 }
cf7d2af9
A
406
407 if (mode == __NSNonLossyASCIIMode) {
408 buffer->chars.unicode[buffer->numChars++] = currentValue;
409 } else if (mode == __NSNonLossyErrorMode) {
410 break;
d8925383 411 }
cf7d2af9
A
412 }
413 result = ((mode == __NSNonLossyASCIIMode) ? YES : NO);
414 } else {
415 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
416
417 if (!converter) return false;
418
419 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
420
421 if (!isASCIISuperset) buffer->isASCII = false;
422
423 if (buffer->isASCII) {
424 for (idx = 0; idx < len; idx++) {
425 if (128 <= chars[idx]) {
426 buffer->isASCII = false;
427 break;
d8925383
A
428 }
429 }
cf7d2af9
A
430 }
431
432 if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
d8925383
A
433 if (buffer->isASCII) {
434 buffer->numChars = len;
435 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
bd5b749c 436 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
cf7d2af9 437 if (!buffer->chars.ascii) goto memoryErrorExit;
d8925383
A
438 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
439 } else {
d8925383 440 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
bd5b749c 441 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
cf7d2af9
A
442 if (!buffer->chars.unicode) goto memoryErrorExit;
443 buffer->numChars = len;
444 if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
445 for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
446 } else {
d8925383 447 for (idx = 0; idx < len; idx++) {
cf7d2af9
A
448 if (chars[idx] < 0x80 && isASCIISuperset) {
449 buffer->chars.unicode[idx] = (UniChar)chars[idx];
450 } else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx)) {
451 result = FALSE;
d8925383 452 break;
9ce05555
A
453 }
454 }
d8925383 455 }
cf7d2af9
A
456 }
457 } else {
458 if (buffer->isASCII) {
459 buffer->numChars = len;
460 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
461 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
462 if (!buffer->chars.ascii) goto memoryErrorExit;
463 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
d8925383 464 } else {
cf7d2af9
A
465 CFIndex guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
466 static UInt32 lossyFlag = (UInt32)-1;
467
468 buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
469 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
470 if (!buffer->chars.unicode) goto memoryErrorExit;
471
8ca704e1 472 if (lossyFlag == (UInt32)-1) lossyFlag = 0;
cf7d2af9
A
473
474 if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) result = FALSE;
9ce05555 475 }
9ce05555
A
476 }
477 }
d8925383 478
cf7d2af9
A
479 if (FALSE == result) {
480memoryErrorExit: // Added for <rdar://problem/6581621>, but it's not clear whether an exception would be a better option
481 result = FALSE; // In case we come here from a goto
482 if (buffer->shouldFreeChars && buffer->chars.unicode) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
483 buffer->isASCII = !alwaysUnicode;
484 buffer->shouldFreeChars = false;
485 buffer->chars.ascii = NULL;
486 buffer->numChars = 0;
487 }
488 return result;
9ce05555
A
489}
490
491
492/* Create a byte stream from a CFString backing. Can convert a string piece at a time
493 into a fixed size buffer. Returns number of characters converted.
494 Characters that cannot be converted to the specified encoding are represented
495 with the char specified by lossByte; if 0, then lossy conversion is not allowed
496 and conversion stops, returning partial results.
497 Pass buffer==NULL if you don't care about the converted string (but just the convertability,
498 or number of bytes required, indicated by usedBufLen).
499 Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
500
501 Note: This function is intended to work through CFString functions, so it should work
502 with NSStrings as well as CFStrings.
503*/
504CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
505 CFIndex totalBytesWritten = 0; /* Number of written bytes */
506 CFIndex numCharsProcessed = 0; /* Number of processed chars */
507 const UniChar *unichars;
508
509 if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
510 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
511
512 if (!__CFToUTF8) {
513 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
514 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
515 }
516 numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
517
518 } else if (encoding == kCFStringEncodingNonLossyASCII) {
519 const char *hex = "0123456789abcdef";
520 UniChar ch;
521 CFStringInlineBuffer buf;
522 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
523 while (numCharsProcessed < rangeLen) {
524 CFIndex reqLength; /* Required number of chars to encode this UniChar */
525 CFIndex cnt;
526 char tmp[6];
527 ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
528 if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
529 reqLength = 1;
bd5b749c 530 tmp[0] = (char)ch;
9ce05555
A
531 } else {
532 if (ch == '\\') {
533 tmp[1] = '\\';
534 reqLength = 2;
535 } else if (ch < 256) { /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
536 tmp[1] = '0' + (ch >> 6);
537 tmp[2] = '0' + ((ch >> 3) & 7);
538 tmp[3] = '0' + (ch & 7);
539 reqLength = 4;
540 } else { /* \Unnnn */
541 tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
542 tmp[2] = hex[(ch >> 12) & 0x0f];
543 tmp[3] = hex[(ch >> 8) & 0x0f];
544 tmp[4] = hex[(ch >> 4) & 0x0f];
545 tmp[5] = hex[ch & 0x0f];
546 reqLength = 6;
547 }
548 tmp[0] = '\\';
549 }
550 if (buffer) {
551 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
552.*/
553 for (cnt = 0; cnt < reqLength; cnt++) {
554 buffer[totalBytesWritten + cnt] = tmp[cnt];
555 }
556 }
557 totalBytesWritten += reqLength;
558 numCharsProcessed++;
559 }
d8925383
A
560 } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
561 CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
9ce05555
A
562 numCharsProcessed = rangeLen;
563 if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
564 numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
565 }
566 totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
567 if (buffer) {
d8925383 568 if (extraForBOM) { /* Generate BOM */
bd5b749c 569#if __CF_BIG_ENDIAN__
9ce05555
A
570 *buffer++ = 0xfe; *buffer++ = 0xff;
571#else
572 *buffer++ = 0xff; *buffer++ = 0xfe;
573#endif
574 }
575 CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
bd5b749c 576 if ((__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap
d8925383
A
577 UTF16Char *characters = (UTF16Char *)buffer;
578 const UTF16Char *limit = characters + numCharsProcessed;
579
580 while (characters < limit) {
581 *characters = CFSwapInt16(*characters);
582 ++characters;
583 }
584 }
9ce05555 585 }
d8925383
A
586 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
587 UTF32Char character;
588 CFStringInlineBuffer buf;
589 UTF32Char *characters = (UTF32Char *)buffer;
590
bd5b749c 591 bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);
d8925383
A
592 if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
593 totalBytesWritten += sizeof(UTF32Char);
594 if (characters) {
595 if (totalBytesWritten > max) { // insufficient buffer
596 totalBytesWritten = 0;
597 } else {
d8925383 598 *(characters++) = 0x0000FEFF;
d8925383
A
599 }
600 }
601 }
602
603 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
604 while (numCharsProcessed < rangeLen) {
605 character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
606
607 if (CFUniCharIsSurrogateHighCharacter(character)) {
608 UTF16Char otherCharacter;
609
610 if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
611 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
612 } else if (lossByte) {
613 character = lossByte;
614 } else {
615 break;
616 }
617 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
618 if (lossByte) {
619 character = lossByte;
620 } else {
621 break;
622 }
623 }
624
625 totalBytesWritten += sizeof(UTF32Char);
626
627 if (characters) {
628 if (totalBytesWritten > max) {
629 totalBytesWritten -= sizeof(UTF32Char);
630 break;
631 }
632 *(characters++) = (swap ? CFSwapInt32(character) : character);
633 }
634
635 numCharsProcessed += (character > 0xFFFF ? 2 : 1);
636 }
9ce05555
A
637 } else {
638 CFIndex numChars;
639 UInt32 flags;
640 const unsigned char *cString = NULL;
bd5b749c 641 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
9ce05555 642
cf7d2af9
A
643 if (!CFStringEncodingIsValidEncoding(encoding)) return 0;
644
d8925383 645 if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
9ce05555 646 const unsigned char *ptr;
bd5b749c 647 if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
9ce05555
A
648 ptr = (cString += rangeLoc);
649 if (__CFStringGetEightBitStringEncoding() == encoding) {
650 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
651 if (buffer) memmove(buffer, cString, numCharsProcessed);
652 if (usedBufLen) *usedBufLen = numCharsProcessed;
653 return numCharsProcessed;
654 }
cf7d2af9
A
655
656 CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;
657 while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {
9ce05555
A
658 ++ptr;
659 --rangeLen;
660 }
661 numCharsProcessed = ptr - cString;
662 if (buffer) {
663 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
664 memmove(buffer, cString, numCharsProcessed);
665 buffer += numCharsProcessed;
666 max -= numCharsProcessed;
667 }
668 if (!rangeLen || (buffer && (max == 0))) {
669 if (usedBufLen) *usedBufLen = numCharsProcessed;
670 return numCharsProcessed;
671 }
672 rangeLoc += numCharsProcessed;
673 totalBytesWritten += numCharsProcessed;
674 }
675 if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
676 ptr = (cString += (rangeLoc + 1));
677 if (__CFStringGetEightBitStringEncoding() == encoding) {
678 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
679 if (buffer) memmove(buffer, cString, numCharsProcessed);
680 if (usedBufLen) *usedBufLen = numCharsProcessed;
681 return numCharsProcessed;
682 }
683 while (*ptr < 0x80 && rangeLen > 0) {
684 ++ptr;
685 --rangeLen;
686 }
687 numCharsProcessed = ptr - cString;
688 if (buffer) {
689 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
690 memmove(buffer, cString, numCharsProcessed);
691 buffer += numCharsProcessed;
692 max -= numCharsProcessed;
693 }
694 if (!rangeLen || (buffer && (max == 0))) {
695 if (usedBufLen) *usedBufLen = numCharsProcessed;
696 return numCharsProcessed;
697 }
698 rangeLoc += numCharsProcessed;
699 totalBytesWritten += numCharsProcessed;
700 }
701 }
702
703 if (!buffer) max = 0;
704
705 // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
d8925383
A
706 // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
707 flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());
9ce05555 708
bd5b749c 709 if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string
cf7d2af9 710 CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
9ce05555
A
711 } else {
712 UniChar charBuf[kCFCharConversionBufferLength];
bd5b749c
A
713 CFIndex currentLength;
714 CFIndex usedLen;
715 CFIndex lastUsedLen = 0, lastNumChars = 0;
9ce05555 716 uint32_t result;
cf7d2af9
A
717 uint32_t streamingMask;
718 uint32_t streamID = 0;
9ce05555
A
719#define MAX_DECOMP_LEN (6)
720
721 while (rangeLen > 0) {
722 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
723 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
724
725 // could be in the middle of surrogate pair; back up.
726 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
727
cf7d2af9
A
728 streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);
729
730 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);
731 streamID = CFStringEncodingStreamIDFromMask(result);
732 result &= ~CFStringEncodingStreamIDMask;
733
734 if (result != kCFStringEncodingConversionSuccess) {
735 if (kCFStringEncodingInvalidInputStream == result) {
736 CFRange composedRange;
737 // Check the tail
738 if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
739 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
740
741 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
742 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
743 streamID = CFStringEncodingStreamIDFromMask(result);
744 result &= ~CFStringEncodingStreamIDMask;
9ce05555 745 }
cf7d2af9
A
746 }
747
748 // Check the head
749 if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
750 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
751
752 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
753 // Try if the composed range can be converted
754 CFStringGetCharacters(string, composedRange, charBuf);
755
756 if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
757 CFIndex lastRangeLoc = rangeLoc - lastNumChars;
758
759 currentLength = composedRange.location - lastRangeLoc;
760 CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
761
762 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);
763 streamID = CFStringEncodingStreamIDFromMask(result);
764 result &= ~CFStringEncodingStreamIDMask;
765
766 if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run
767 // Looks good. back up
768 totalBytesWritten -= lastUsedLen;
769 numCharsProcessed -= lastNumChars;
770
771 rangeLoc = lastRangeLoc;
772 rangeLen += lastNumChars;
773
774 if (max) {
775 buffer -= lastUsedLen;
776 max += lastUsedLen;
9ce05555
A
777 }
778 }
779 }
780 }
781 }
9ce05555 782 }
cf7d2af9
A
783
784 if (kCFStringEncodingConversionSuccess != result) { // really failed
785 totalBytesWritten += usedLen;
786 numCharsProcessed += numChars;
787 break;
788 }
9ce05555 789 }
cf7d2af9 790
9ce05555
A
791 totalBytesWritten += usedLen;
792 numCharsProcessed += numChars;
793
794 rangeLoc += numChars;
795 rangeLen -= numChars;
796 if (max) {
797 buffer += usedLen;
798 max -= usedLen;
799 if (max <= 0) break;
800 }
801 lastUsedLen = usedLen; lastNumChars = numChars;
802 flags &= ~kCFStringEncodingPrependBOM;
803 }
804 }
805 }
806 if (usedBufLen) *usedBufLen = totalBytesWritten;
807 return numCharsProcessed;
808}
809
d8925383
A
810CFStringRef CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc, const char *buffer) {
811 return CFStringCreateWithCString(alloc, buffer, CFStringFileSystemEncoding());
812}
813
814CFIndex CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string) {
815 CFIndex len = CFStringGetLength(string);
816 CFStringEncoding enc = CFStringGetFastestEncoding(string);
817 switch (enc) {
818 case kCFStringEncodingASCII:
819 case kCFStringEncodingMacRoman:
cf7d2af9
A
820 if (len > (LONG_MAX - 1L) / 3L) return kCFNotFound; // Avoid wrap-around
821 return len * 3L + 1L;
d8925383 822 default:
cf7d2af9
A
823 if (len > (LONG_MAX - 1L) / 9L) return kCFNotFound; // Avoid wrap-around
824 return len * 9L + 1L;
d8925383
A
825 }
826}
827
828Boolean CFStringGetFileSystemRepresentation(CFStringRef string, char *buffer, CFIndex maxBufLen) {
cf7d2af9 829#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
d8925383 830#define MAX_STACK_BUFFER_LEN (255)
9ce05555 831 const UTF16Char *characters = CFStringGetCharactersPtr(string);
8ca704e1 832 const char *origBuffer = buffer;
bd5b749c 833 const char *bufferLimit = buffer + maxBufLen;
0ae65c4b 834 CFIndex length = CFStringGetLength(string);
bd5b749c 835 CFIndex usedBufLen;
9ce05555 836
0ae65c4b 837 if (maxBufLen < length) return false; // Since we're using UTF-8, the byte length is never shorter than the char length. Also, it filters out 0 == maxBufLen
9ce05555 838
0ae65c4b 839 if (NULL == characters) {
bd5b749c
A
840 UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
841 CFRange range = CFRangeMake(0, 0);
842 const char *bytes = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding());
9ce05555 843
bd5b749c
A
844 if (NULL != bytes) {
845 const char *originalBytes = bytes;
846 const char *bytesLimit = bytes + length;
9ce05555 847
bd5b749c 848 while ((bytes < bytesLimit) && (buffer < bufferLimit) && (0 == (*bytes & 0x80))) *(buffer++) = *(bytes++);
9ce05555 849
bd5b749c
A
850 range.location = bytes - originalBytes;
851 }
852 while ((range.location < length) && (buffer < bufferLimit)) {
853 range.length = length - range.location;
854 if (range.length > MAX_STACK_BUFFER_LEN) range.length = MAX_STACK_BUFFER_LEN;
9ce05555 855
bd5b749c
A
856 CFStringGetCharacters(string, range, charactersBuffer);
857 if ((range.length == MAX_STACK_BUFFER_LEN) && CFUniCharIsSurrogateHighCharacter(charactersBuffer[MAX_STACK_BUFFER_LEN - 1])) --range.length; // Backup for a high surrogate
858
859 if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, bufferLimit - buffer, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
9ce05555 860
9ce05555 861 buffer += usedBufLen;
bd5b749c 862 range.location += range.length;
9ce05555
A
863 }
864 } else {
0ae65c4b 865 if (!CFUniCharDecompose(characters, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
9ce05555
A
866 buffer += usedBufLen;
867 }
868
bd5b749c 869 if (buffer < bufferLimit) { // Since the filename has its own limit, this is ok for now
9ce05555 870 *buffer = '\0';
8ca704e1
A
871 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionLion)) {
872 while (origBuffer < buffer) if (*origBuffer++ == 0) { // There's a zero in there. Now see if the rest are all zeroes.
873 while (origBuffer < buffer) if (*origBuffer++ != 0) return false; // Embedded NULLs should cause failure: <rdar://problem/5863219>
874 }
875 }
9ce05555
A
876 return true;
877 } else {
878 return false;
879 }
cf7d2af9 880#else
9ce05555 881 return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
cf7d2af9 882#endif
9ce05555 883}
d8925383
A
884
885Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
bd5b749c 886 return CFStringGetFileSystemRepresentation(string, (char *)buffer, maxBufLen);
d8925383
A
887}
888
cf7d2af9
A
889
890#if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
891
892/* This function is used to obtain users' default script/region code.
893 The function first looks at environment variable __kCFUserEncodingEnvVariableName, then, reads the configuration file in user's home directory.
894*/
895void _CFStringGetUserDefaultEncoding(UInt32 *oScriptValue, UInt32 *oRegionValue) {
896 char *stringValue;
897 char buffer[__kCFMaxDefaultEncodingFileLength];
898 int uid = getuid();
899
8ca704e1 900 if ((stringValue = (char *)__CFgetenv(__kCFUserEncodingEnvVariableName)) != NULL) {
cf7d2af9
A
901 if ((uid == strtol_l(stringValue, &stringValue, 0, NULL)) && (':' == *stringValue)) {
902 ++stringValue;
903 } else {
904 stringValue = NULL;
905 }
906 }
907
8ca704e1 908 if ((stringValue == NULL) && ((uid > 0) || __CFgetenv("HOME"))) {
cf7d2af9
A
909 char passwdExtraBuf[1000 + MAXPATHLEN]; // Extra memory buffer for getpwuid_r(); no clue as to how large this should be...
910 struct passwd passwdBuf, *passwdp = NULL;
911
912 switch (getpwuid_r((uid_t)uid, &passwdBuf, passwdExtraBuf, sizeof(passwdExtraBuf), &passwdp)) {
913 case 0: // Success
914 break;
915 case ERANGE: // Somehow we didn't give it enough memory; let the system handle the storage this time; but beware 5778609
916 passwdp = getpwuid((uid_t)uid);
917 break;
918 default:
919 passwdp = NULL;
920 }
921 if (passwdp) {
922 char filename[MAXPATHLEN + 1];
923
924 const char *path = NULL;
925 if (!issetugid()) {
8ca704e1 926 path = __CFgetenv("CFFIXED_USER_HOME");
cf7d2af9
A
927 }
928 if (!path) {
929 path = passwdp->pw_dir;
930 }
931
932 strlcpy(filename, path, sizeof(filename));
933 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
934
935 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
936 int fd = open(filename, O_RDONLY, 0);
937 if (fd == -1) {
938 // Cannot open the file. Let's fallback to smRoman/verUS
939 snprintf(filename, sizeof(filename), "0x%X:0:0", uid);
940 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
941 } else {
8ca704e1 942 ssize_t readSize;
cf7d2af9
A
943 readSize = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
944 buffer[(readSize < 0 ? 0 : readSize)] = '\0';
945 close(fd);
946 stringValue = buffer;
947
948 // Well, we already have a buffer, let's reuse it
949 snprintf(filename, sizeof(filename), "0x%X:%s", uid, buffer);
950 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
951 }
952 if (-1 != no_hang_fd) close(no_hang_fd);
953 }
954 }
955
956 if (stringValue) {
957 *oScriptValue = strtol_l(stringValue, &stringValue, 0, NULL);
db04bbf9
A
958 // We force using MacRoman for Arabic/Hebrew users <rdar://problem/17633551> When changing language to Arabic and Hebrew, set the default user encoding to MacRoman, not MacArabic/MacHebrew
959 if ((*oScriptValue == kCFStringEncodingMacArabic) || (*oScriptValue == kCFStringEncodingMacHebrew)) *oScriptValue = kCFStringEncodingMacRoman;
cf7d2af9
A
960 if (*stringValue == ':') {
961 if (oRegionValue) *oRegionValue = strtol_l(++stringValue, NULL, 0, NULL);
962 return;
963 }
964 }
965
966 // Falling back
967 *oScriptValue = 0; // smRoman
968 if (oRegionValue) *oRegionValue = 0; // verUS
969}
970
971void _CFStringGetInstallationEncodingAndRegion(uint32_t *encoding, uint32_t *region) {
972 char buffer[__kCFMaxDefaultEncodingFileLength];
973 char *stringValue = NULL;
974
975 *encoding = 0;
976 *region = 0;
977
978 struct passwd *passwdp = getpwuid((uid_t)0);
979 if (passwdp) {
980 const char *path = passwdp->pw_dir;
981
982 char filename[MAXPATHLEN + 1];
983 strlcpy(filename, path, sizeof(filename));
984 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
985
986 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
987 int fd = open(filename, O_RDONLY, 0);
988 if (0 <= fd) {
8ca704e1 989 ssize_t size = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
cf7d2af9
A
990 buffer[(size < 0 ? 0 : size)] = '\0';
991 close(fd);
992 stringValue = buffer;
993 }
994 if (-1 != no_hang_fd) close(no_hang_fd);
995 }
996
997 if (stringValue) {
998 *encoding = strtol_l(stringValue, &stringValue, 0, NULL);
db04bbf9
A
999 // We force using MacRoman for Arabic/Hebrew users <rdar://problem/17633551> When changing language to Arabic and Hebrew, set the default user encoding to MacRoman, not MacArabic/MacHebrew
1000 if ((*encoding == kCFStringEncodingMacArabic) || (*encoding == kCFStringEncodingMacHebrew)) *encoding = kCFStringEncodingMacRoman;
cf7d2af9
A
1001 if (*stringValue == ':') *region = strtol_l(++stringValue, NULL, 0, NULL);
1002 }
1003}
1004
1005Boolean _CFStringSaveUserDefaultEncoding(UInt32 iScriptValue, UInt32 iRegionValue) {
1006 Boolean success = false;
1007 struct passwd *passwdp = getpwuid(getuid());
1008 if (passwdp) {
1009 const char *path = passwdp->pw_dir;
1010 if (!issetugid()) {
8ca704e1 1011 const char *value = __CFgetenv("CFFIXED_USER_HOME");
cf7d2af9
A
1012 if (value) path = value; // override
1013 }
1014
1015 char filename[MAXPATHLEN + 1];
1016 strlcpy(filename, path, sizeof(filename));
1017 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
1018
1019 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
1020 (void)unlink(filename);
1021 int fd = open(filename, O_WRONLY|O_CREAT, 0400);
1022 if (0 <= fd) {
1023 char buffer[__kCFMaxDefaultEncodingFileLength];
db04bbf9
A
1024 // We force using MacRoman for Arabic/Hebrew users <rdar://problem/17633551> When changing language to Arabic and Hebrew, set the default user encoding to MacRoman, not MacArabic/MacHebrew
1025 if ((iScriptValue == kCFStringEncodingMacArabic) || (iScriptValue == kCFStringEncodingMacHebrew)) iScriptValue = kCFStringEncodingMacRoman;
cf7d2af9
A
1026 size_t size = snprintf(buffer, __kCFMaxDefaultEncodingFileLength, "0x%X:0x%X", (unsigned int)iScriptValue, (unsigned int)iRegionValue);
1027 if (size <= __kCFMaxDefaultEncodingFileLength) {
1028 int ret = write(fd, buffer, size);
1029 if (size <= ret) success = true;
1030 }
1031 int save_err = errno;
1032 close(fd);
1033 errno = save_err;
1034 }
1035 int save_err = errno;
1036 if (-1 != no_hang_fd) close(no_hang_fd);
1037 errno = save_err;
1038 }
1039 return success;
1040}
1041
1042#endif
1043