]> git.saurik.com Git - apple/cf.git/blame - CFStringEncodings.c
CF-550.19.tar.gz
[apple/cf.git] / CFStringEncodings.c
CommitLineData
9ce05555 1/*
e588f561 2 * Copyright (c) 2010 Apple Inc. All rights reserved.
9ce05555
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
9ce05555
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
f64f9b69 23
9ce05555 24/* CFStringEncodings.c
cf7d2af9 25 Copyright (c) 1999-2009, Apple Inc. All rights reserved.
9ce05555
A
26 Responsibility: Aki Inoue
27*/
28
29#include "CFInternal.h"
30#include <CoreFoundation/CFString.h>
31#include <CoreFoundation/CFByteOrder.h>
cf7d2af9 32#include <CoreFoundation/CFPriv.h>
9ce05555 33#include <string.h>
cf7d2af9
A
34#include <CoreFoundation/CFStringEncodingConverterExt.h>
35#include <CoreFoundation/CFUniChar.h>
36#include <CoreFoundation/CFUnicodeDecomposition.h>
37#if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
38#include <stdlib.h>
39#include <fcntl.h>
40#include <pwd.h>
41#include <sys/param.h>
42#include <unistd.h>
43#include <string.h>
44#include <stdio.h>
45#include <xlocale.h>
46#include <CoreFoundation/CFStringDefaultEncoding.h>
47#endif
9ce05555
A
48
49static UInt32 __CFWantsToUseASCIICompatibleConversion = (UInt32)-1;
50CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) {
51 if (__CFWantsToUseASCIICompatibleConversion == (UInt32)-1) {
d8925383 52 __CFWantsToUseASCIICompatibleConversion = false;
9ce05555
A
53 }
54 return (__CFWantsToUseASCIICompatibleConversion ? kCFStringEncodingASCIICompatibleConversion : 0);
55}
56
57void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
58 __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
59}
60
61Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
62
63// To avoid early initialization issues, we just initialize this here
64// This should not be const as it is changed
cf7d2af9 65__private_extern__ UniChar __CFCharToUniCharTable[256] = {
9ce05555
A
66 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
67 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
68 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
69 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
70 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
71 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
72 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
73112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
74128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
75144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
76160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
77176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
78192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
79208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
80224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
81240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
82};
83
cf7d2af9 84__private_extern__ void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
9ce05555
A
85 if (__CFCharToUniCharFunc != func) {
86 int ch;
87 __CFCharToUniCharFunc = func;
88 if (func) {
89 for (ch = 128; ch < 256; ch++) {
90 UniChar uch;
91 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
92 }
93 } else { // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
94 for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
95 }
96 }
97}
98
99__private_extern__ void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
100 CFIndex idx;
101 for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
102}
103
104
105/* The minimum length the output buffers should be in the above functions
106*/
107#define kCFCharConversionBufferLength 512
108
109
110#define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))
111#define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))
112
9ce05555
A
113/* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
1149/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
115Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
116__CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
117!!! converterFlags is only used for the UTF8 converter at this point
118*/
119Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
120 return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
121}
122
123enum {
124 __NSNonLossyErrorMode = -1,
125 __NSNonLossyASCIIMode = 0,
126 __NSNonLossyBackslashMode = 1,
127 __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
128 __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
129 __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
130 __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
131};
132
bd5b749c 133Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
cf7d2af9
A
134 CFIndex idx;
135 const uint8_t *chars = (const uint8_t *)bytes;
136 const uint8_t *end = chars + len;
137 Boolean result = TRUE;
9ce05555
A
138
139 if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
140
141 buffer->isASCII = !alwaysUnicode;
142 buffer->shouldFreeChars = false;
143 buffer->numChars = 0;
d8925383 144
9ce05555
A
145 if (0 == len) return true;
146
147 buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
d8925383
A
148
149 if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) { // UTF-16
150 const UTF16Char *src = (const UTF16Char *)bytes;
151 const UTF16Char *limit = (const UTF16Char *)(bytes + len);
152 bool swap = false;
153
154 if (kCFStringEncodingUTF16 == encoding) {
155 UTF16Char bom = ((*src == 0xFFFE) || (*src == 0xFEFF) ? *(src++) : 0);
156
bd5b749c 157#if __CF_BIG_ENDIAN__
d8925383
A
158 if (bom == 0xFFFE) swap = true;
159#else
160 if (bom != 0xFEFF) swap = true;
161#endif
162 if (bom) useClientsMemoryPtr = NULL;
163 } else {
bd5b749c 164#if __CF_BIG_ENDIAN__
d8925383
A
165 if (kCFStringEncodingUTF16LE == encoding) swap = true;
166#else
167 if (kCFStringEncodingUTF16BE == encoding) swap = true;
168#endif
9ce05555
A
169 }
170
d8925383
A
171 buffer->numChars = limit - src;
172
173 if (useClientsMemoryPtr && !swap) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
174 *useClientsMemoryPtr = true;
175 buffer->chars.unicode = (UniChar *)src;
176 buffer->isASCII = false;
9ce05555 177 } else {
d8925383
A
178 if (buffer->isASCII) { // Let's see if we can reduce the Unicode down to ASCII...
179 const UTF16Char *characters = src;
180 UTF16Char mask = (swap ? 0x80FF : 0xFF80);
181
182 while (characters < limit) {
183 if (*(characters++) & mask) {
184 buffer->isASCII = false;
185 break;
186 }
9ce05555
A
187 }
188 }
d8925383
A
189
190 if (buffer->isASCII) {
191 uint8_t *dst;
192 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
193 if (buffer->numChars > MAX_LOCAL_CHARS) {
bd5b749c 194 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
cf7d2af9 195 if (!buffer->chars.ascii) goto memoryErrorExit;
d8925383
A
196 buffer->shouldFreeChars = true;
197 } else {
198 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
199 }
9ce05555 200 }
d8925383
A
201 dst = buffer->chars.ascii;
202
203 if (swap) {
204 while (src < limit) *(dst++) = (*(src++) >> 8);
9ce05555 205 } else {
bd5b749c 206 while (src < limit) *(dst++) = (uint8_t)*(src++);
9ce05555 207 }
d8925383
A
208 } else {
209 UTF16Char *dst;
9ce05555 210
d8925383
A
211 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
212 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
bd5b749c 213 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
cf7d2af9 214 if (!buffer->chars.unicode) goto memoryErrorExit;
d8925383 215 buffer->shouldFreeChars = true;
9ce05555 216 } else {
d8925383 217 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
9ce05555 218 }
d8925383
A
219 }
220 dst = buffer->chars.unicode;
221
222 if (swap) {
223 while (src < limit) *(dst++) = CFSwapInt16(*(src++));
9ce05555 224 } else {
d8925383 225 memmove(dst, src, buffer->numChars * sizeof(UTF16Char));
9ce05555 226 }
9ce05555 227 }
d8925383
A
228 }
229 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
230 const UTF32Char *src = (const UTF32Char *)bytes;
231 const UTF32Char *limit = (const UTF32Char *)(bytes + len);
232 bool swap = false;
bd5b749c
A
233 static bool strictUTF32 = (bool)-1;
234
235 if ((bool)-1 == strictUTF32) strictUTF32 = (_CFExecutableLinkedOnOrAfter(CFSystemVersionLeopard) != 0);
9ce05555 236
d8925383
A
237 if (kCFStringEncodingUTF32 == encoding) {
238 UTF32Char bom = ((*src == 0xFFFE0000) || (*src == 0x0000FEFF) ? *(src++) : 0);
9ce05555 239
bd5b749c 240#if __CF_BIG_ENDIAN__
d8925383
A
241 if (bom == 0xFFFE0000) swap = true;
242#else
243 if (bom != 0x0000FEFF) swap = true;
244#endif
245 } else {
bd5b749c 246#if __CF_BIG_ENDIAN__
d8925383
A
247 if (kCFStringEncodingUTF32LE == encoding) swap = true;
248#else
249 if (kCFStringEncodingUTF32BE == encoding) swap = true;
250#endif
9ce05555 251 }
d8925383
A
252
253 buffer->numChars = limit - src;
254
255 {
256 // Let's see if we have non-ASCII or non-BMP
257 const UTF32Char *characters = src;
258 UTF32Char asciiMask = (swap ? 0x80FFFFFF : 0xFFFFFF80);
259 UTF32Char bmpMask = (swap ? 0x0000FFFF : 0xFFFF0000);
260
261 while (characters < limit) {
262 if (*characters & asciiMask) {
263 buffer->isASCII = false;
bd5b749c 264 if (*characters & bmpMask) {
cf7d2af9 265 if (strictUTF32 && ((swap ? (UTF32Char)CFSwapInt32(*characters) : *characters) > 0x10FFFF)) return false; // outside of Unicode Scaler Value. Haven't allocated buffer, yet.
bd5b749c
A
266 ++(buffer->numChars);
267 }
9ce05555 268 }
d8925383 269 ++characters;
9ce05555
A
270 }
271 }
d8925383
A
272
273 if (buffer->isASCII) {
274 uint8_t *dst;
275 if (NULL == buffer->chars.ascii) { // we never reallocate when buffer is supplied
276 if (buffer->numChars > MAX_LOCAL_CHARS) {
bd5b749c 277 buffer->chars.ascii = (UInt8 *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(uint8_t)), 0);
cf7d2af9 278 if (!buffer->chars.ascii) goto memoryErrorExit;
d8925383
A
279 buffer->shouldFreeChars = true;
280 } else {
281 buffer->chars.ascii = (uint8_t *)buffer->localBuffer;
282 }
9ce05555 283 }
d8925383 284 dst = buffer->chars.ascii;
9ce05555 285
d8925383
A
286 if (swap) {
287 while (src < limit) *(dst++) = (*(src++) >> 24);
288 } else {
289 while (src < limit) *(dst++) = *(src++);
290 }
291 } else {
292 if (NULL == buffer->chars.unicode) { // we never reallocate when buffer is supplied
293 if (buffer->numChars > MAX_LOCAL_UNICHARS) {
bd5b749c 294 buffer->chars.unicode = (UniChar *)CFAllocatorAllocate(buffer->allocator, (buffer->numChars * sizeof(UTF16Char)), 0);
cf7d2af9 295 if (!buffer->chars.unicode) goto memoryErrorExit;
d8925383
A
296 buffer->shouldFreeChars = true;
297 } else {
298 buffer->chars.unicode = (UTF16Char *)buffer->localBuffer;
299 }
300 }
cf7d2af9 301 result = (CFUniCharFromUTF32(src, limit - src, buffer->chars.unicode, (strictUTF32 ? false : true), __CF_BIG_ENDIAN__ ? !swap : swap) ? TRUE : FALSE);
d8925383 302 }
cf7d2af9
A
303 } else if (kCFStringEncodingUTF8 == encoding) {
304 if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) { // If UTF8 BOM, skip
305 chars += 3;
306 len -= 3;
307 if (0 == len) return true;
308 }
309 if (buffer->isASCII) {
310 for (idx = 0; idx < len; idx++) {
311 if (128 <= chars[idx]) {
312 buffer->isASCII = false;
313 break;
314 }
315 }
316 }
317 if (buffer->isASCII) {
318 buffer->numChars = len;
319 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
320 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
321 if (!buffer->chars.ascii) goto memoryErrorExit;
322 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
323 } else {
324 CFIndex numDone;
325 static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
326
327 if (!__CFFromUTF8) {
328 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
329 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
330 }
331
9ce05555 332 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
bd5b749c 333 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
cf7d2af9 334 if (!buffer->chars.unicode) goto memoryErrorExit;
9ce05555
A
335 buffer->numChars = 0;
336 while (chars < end) {
cf7d2af9
A
337 numDone = 0;
338 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
339
340 if (0 == numDone) {
341 result = FALSE;
342 break;
343 }
344 buffer->numChars += numDone;
345 }
346 }
347 } else if (kCFStringEncodingNonLossyASCII == encoding) {
348 UTF16Char currentValue = 0;
349 uint8_t character;
350 int8_t mode = __NSNonLossyASCIIMode;
351
352 buffer->isASCII = false;
353 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
354 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
355 if (!buffer->chars.unicode) goto memoryErrorExit;
356 buffer->numChars = 0;
357
358 while (chars < end) {
359 character = (*chars++);
360
361 switch (mode) {
d8925383
A
362 case __NSNonLossyASCIIMode:
363 if (character == '\\') {
364 mode = __NSNonLossyBackslashMode;
365 } else if (character < 0x80) {
366 currentValue = character;
367 } else {
368 mode = __NSNonLossyErrorMode;
369 }
370 break;
cf7d2af9
A
371
372 case __NSNonLossyBackslashMode:
d8925383
A
373 if ((character == 'U') || (character == 'u')) {
374 mode = __NSNonLossyHexInitialMode;
375 currentValue = 0;
376 } else if ((character >= '0') && (character <= '9')) {
377 mode = __NSNonLossyOctalInitialMode;
378 currentValue = character - '0';
379 } else if (character == '\\') {
380 mode = __NSNonLossyASCIIMode;
381 currentValue = character;
382 } else {
383 mode = __NSNonLossyErrorMode;
384 }
385 break;
cf7d2af9
A
386
387 default:
d8925383
A
388 if (mode < __NSNonLossyHexFinalMode) {
389 if ((character >= '0') && (character <= '9')) {
390 currentValue = (currentValue << 4) | (character - '0');
391 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
392 } else {
393 if (character >= 'a') character -= ('a' - 'A');
394 if ((character >= 'A') && (character <= 'F')) {
395 currentValue = (currentValue << 4) | ((character - 'A') + 10);
396 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
397 } else {
398 mode = __NSNonLossyErrorMode;
399 }
400 }
401 } else {
402 if ((character >= '0') && (character <= '9')) {
403 currentValue = (currentValue << 3) | (character - '0');
404 if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
405 } else {
406 mode = __NSNonLossyErrorMode;
407 }
408 }
409 break;
9ce05555 410 }
cf7d2af9
A
411
412 if (mode == __NSNonLossyASCIIMode) {
413 buffer->chars.unicode[buffer->numChars++] = currentValue;
414 } else if (mode == __NSNonLossyErrorMode) {
415 break;
d8925383 416 }
cf7d2af9
A
417 }
418 result = ((mode == __NSNonLossyASCIIMode) ? YES : NO);
419 } else {
420 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
421
422 if (!converter) return false;
423
424 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
425
426 if (!isASCIISuperset) buffer->isASCII = false;
427
428 if (buffer->isASCII) {
429 for (idx = 0; idx < len; idx++) {
430 if (128 <= chars[idx]) {
431 buffer->isASCII = false;
432 break;
d8925383
A
433 }
434 }
cf7d2af9
A
435 }
436
437 if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
d8925383
A
438 if (buffer->isASCII) {
439 buffer->numChars = len;
440 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
bd5b749c 441 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
cf7d2af9 442 if (!buffer->chars.ascii) goto memoryErrorExit;
d8925383
A
443 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
444 } else {
d8925383 445 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
bd5b749c 446 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
cf7d2af9
A
447 if (!buffer->chars.unicode) goto memoryErrorExit;
448 buffer->numChars = len;
449 if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
450 for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
451 } else {
d8925383 452 for (idx = 0; idx < len; idx++) {
cf7d2af9
A
453 if (chars[idx] < 0x80 && isASCIISuperset) {
454 buffer->chars.unicode[idx] = (UniChar)chars[idx];
455 } else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx)) {
456 result = FALSE;
d8925383 457 break;
9ce05555
A
458 }
459 }
d8925383 460 }
cf7d2af9
A
461 }
462 } else {
463 if (buffer->isASCII) {
464 buffer->numChars = len;
465 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
466 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : (UInt8 *)CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
467 if (!buffer->chars.ascii) goto memoryErrorExit;
468 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
d8925383 469 } else {
cf7d2af9
A
470 CFIndex guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
471 static UInt32 lossyFlag = (UInt32)-1;
472
473 buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
474 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : (UniChar *)CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
475 if (!buffer->chars.unicode) goto memoryErrorExit;
476
477 if (lossyFlag == (UInt32)-1) lossyFlag = (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? 0 : kCFStringEncodingAllowLossyConversion);
478
479 if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) result = FALSE;
9ce05555 480 }
9ce05555
A
481 }
482 }
d8925383 483
cf7d2af9
A
484 if (FALSE == result) {
485memoryErrorExit: // Added for <rdar://problem/6581621>, but it's not clear whether an exception would be a better option
486 result = FALSE; // In case we come here from a goto
487 if (buffer->shouldFreeChars && buffer->chars.unicode) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
488 buffer->isASCII = !alwaysUnicode;
489 buffer->shouldFreeChars = false;
490 buffer->chars.ascii = NULL;
491 buffer->numChars = 0;
492 }
493 return result;
9ce05555
A
494}
495
496
497/* Create a byte stream from a CFString backing. Can convert a string piece at a time
498 into a fixed size buffer. Returns number of characters converted.
499 Characters that cannot be converted to the specified encoding are represented
500 with the char specified by lossByte; if 0, then lossy conversion is not allowed
501 and conversion stops, returning partial results.
502 Pass buffer==NULL if you don't care about the converted string (but just the convertability,
503 or number of bytes required, indicated by usedBufLen).
504 Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
505
506 Note: This function is intended to work through CFString functions, so it should work
507 with NSStrings as well as CFStrings.
508*/
509CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
510 CFIndex totalBytesWritten = 0; /* Number of written bytes */
511 CFIndex numCharsProcessed = 0; /* Number of processed chars */
512 const UniChar *unichars;
513
514 if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
515 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
516
517 if (!__CFToUTF8) {
518 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
519 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
520 }
521 numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
522
523 } else if (encoding == kCFStringEncodingNonLossyASCII) {
524 const char *hex = "0123456789abcdef";
525 UniChar ch;
526 CFStringInlineBuffer buf;
527 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
528 while (numCharsProcessed < rangeLen) {
529 CFIndex reqLength; /* Required number of chars to encode this UniChar */
530 CFIndex cnt;
531 char tmp[6];
532 ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
533 if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
534 reqLength = 1;
bd5b749c 535 tmp[0] = (char)ch;
9ce05555
A
536 } else {
537 if (ch == '\\') {
538 tmp[1] = '\\';
539 reqLength = 2;
540 } else if (ch < 256) { /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
541 tmp[1] = '0' + (ch >> 6);
542 tmp[2] = '0' + ((ch >> 3) & 7);
543 tmp[3] = '0' + (ch & 7);
544 reqLength = 4;
545 } else { /* \Unnnn */
546 tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
547 tmp[2] = hex[(ch >> 12) & 0x0f];
548 tmp[3] = hex[(ch >> 8) & 0x0f];
549 tmp[4] = hex[(ch >> 4) & 0x0f];
550 tmp[5] = hex[ch & 0x0f];
551 reqLength = 6;
552 }
553 tmp[0] = '\\';
554 }
555 if (buffer) {
556 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
557.*/
558 for (cnt = 0; cnt < reqLength; cnt++) {
559 buffer[totalBytesWritten + cnt] = tmp[cnt];
560 }
561 }
562 totalBytesWritten += reqLength;
563 numCharsProcessed++;
564 }
d8925383
A
565 } else if ((encoding == kCFStringEncodingUTF16) || (encoding == kCFStringEncodingUTF16BE) || (encoding == kCFStringEncodingUTF16LE)) {
566 CFIndex extraForBOM = (generatingExternalFile && (encoding == kCFStringEncodingUTF16) ? sizeof(UniChar) : 0);
9ce05555
A
567 numCharsProcessed = rangeLen;
568 if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
569 numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
570 }
571 totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
572 if (buffer) {
d8925383 573 if (extraForBOM) { /* Generate BOM */
bd5b749c 574#if __CF_BIG_ENDIAN__
9ce05555
A
575 *buffer++ = 0xfe; *buffer++ = 0xff;
576#else
577 *buffer++ = 0xff; *buffer++ = 0xfe;
578#endif
579 }
580 CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
bd5b749c 581 if ((__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF16LE : kCFStringEncodingUTF16BE) == encoding) { // Need to swap
d8925383
A
582 UTF16Char *characters = (UTF16Char *)buffer;
583 const UTF16Char *limit = characters + numCharsProcessed;
584
585 while (characters < limit) {
586 *characters = CFSwapInt16(*characters);
587 ++characters;
588 }
589 }
9ce05555 590 }
d8925383
A
591 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) {
592 UTF32Char character;
593 CFStringInlineBuffer buf;
594 UTF32Char *characters = (UTF32Char *)buffer;
595
bd5b749c 596 bool swap = (encoding == (__CF_BIG_ENDIAN__ ? kCFStringEncodingUTF32LE : kCFStringEncodingUTF32BE) ? true : false);
d8925383
A
597 if (generatingExternalFile && (encoding == kCFStringEncodingUTF32)) {
598 totalBytesWritten += sizeof(UTF32Char);
599 if (characters) {
600 if (totalBytesWritten > max) { // insufficient buffer
601 totalBytesWritten = 0;
602 } else {
d8925383 603 *(characters++) = 0x0000FEFF;
d8925383
A
604 }
605 }
606 }
607
608 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
609 while (numCharsProcessed < rangeLen) {
610 character = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
611
612 if (CFUniCharIsSurrogateHighCharacter(character)) {
613 UTF16Char otherCharacter;
614
615 if (((numCharsProcessed + 1) < rangeLen) && CFUniCharIsSurrogateLowCharacter((otherCharacter = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed + 1)))) {
616 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherCharacter);
617 } else if (lossByte) {
618 character = lossByte;
619 } else {
620 break;
621 }
622 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
623 if (lossByte) {
624 character = lossByte;
625 } else {
626 break;
627 }
628 }
629
630 totalBytesWritten += sizeof(UTF32Char);
631
632 if (characters) {
633 if (totalBytesWritten > max) {
634 totalBytesWritten -= sizeof(UTF32Char);
635 break;
636 }
637 *(characters++) = (swap ? CFSwapInt32(character) : character);
638 }
639
640 numCharsProcessed += (character > 0xFFFF ? 2 : 1);
641 }
9ce05555
A
642 } else {
643 CFIndex numChars;
644 UInt32 flags;
645 const unsigned char *cString = NULL;
bd5b749c 646 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
9ce05555 647
cf7d2af9
A
648 if (!CFStringEncodingIsValidEncoding(encoding)) return 0;
649
d8925383 650 if (!CF_IS_OBJC(CFStringGetTypeID(), string) && isASCIISuperset) { // Checking for NSString to avoid infinite recursion
9ce05555 651 const unsigned char *ptr;
bd5b749c 652 if ((cString = (const unsigned char *)CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
9ce05555
A
653 ptr = (cString += rangeLoc);
654 if (__CFStringGetEightBitStringEncoding() == encoding) {
655 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
656 if (buffer) memmove(buffer, cString, numCharsProcessed);
657 if (usedBufLen) *usedBufLen = numCharsProcessed;
658 return numCharsProcessed;
659 }
cf7d2af9
A
660
661 CFIndex uninterestingTailLen = buffer ? (rangeLen - MIN(max, rangeLen)) : 0;
662 while (*ptr < 0x80 && rangeLen > uninterestingTailLen) {
9ce05555
A
663 ++ptr;
664 --rangeLen;
665 }
666 numCharsProcessed = ptr - cString;
667 if (buffer) {
668 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
669 memmove(buffer, cString, numCharsProcessed);
670 buffer += numCharsProcessed;
671 max -= numCharsProcessed;
672 }
673 if (!rangeLen || (buffer && (max == 0))) {
674 if (usedBufLen) *usedBufLen = numCharsProcessed;
675 return numCharsProcessed;
676 }
677 rangeLoc += numCharsProcessed;
678 totalBytesWritten += numCharsProcessed;
679 }
680 if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
681 ptr = (cString += (rangeLoc + 1));
682 if (__CFStringGetEightBitStringEncoding() == encoding) {
683 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
684 if (buffer) memmove(buffer, cString, numCharsProcessed);
685 if (usedBufLen) *usedBufLen = numCharsProcessed;
686 return numCharsProcessed;
687 }
688 while (*ptr < 0x80 && rangeLen > 0) {
689 ++ptr;
690 --rangeLen;
691 }
692 numCharsProcessed = ptr - cString;
693 if (buffer) {
694 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
695 memmove(buffer, cString, numCharsProcessed);
696 buffer += numCharsProcessed;
697 max -= numCharsProcessed;
698 }
699 if (!rangeLen || (buffer && (max == 0))) {
700 if (usedBufLen) *usedBufLen = numCharsProcessed;
701 return numCharsProcessed;
702 }
703 rangeLoc += numCharsProcessed;
704 totalBytesWritten += numCharsProcessed;
705 }
706 }
707
708 if (!buffer) max = 0;
709
710 // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
d8925383
A
711 // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
712 flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | (isASCIISuperset ? 0 : __CFGetASCIICompatibleFlag());
9ce05555 713
bd5b749c 714 if (!cString && (cString = (const unsigned char *)CFStringGetCharactersPtr(string))) { // Must be Unicode string
cf7d2af9 715 CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar *)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
9ce05555
A
716 } else {
717 UniChar charBuf[kCFCharConversionBufferLength];
bd5b749c
A
718 CFIndex currentLength;
719 CFIndex usedLen;
720 CFIndex lastUsedLen = 0, lastNumChars = 0;
9ce05555 721 uint32_t result;
cf7d2af9
A
722 uint32_t streamingMask;
723 uint32_t streamID = 0;
9ce05555
A
724#define MAX_DECOMP_LEN (6)
725
726 while (rangeLen > 0) {
727 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
728 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
729
730 // could be in the middle of surrogate pair; back up.
731 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
732
cf7d2af9
A
733 streamingMask = ((rangeLen > currentLength) ? kCFStringEncodingPartialInput : 0)|CFStringEncodingStreamIDToMask(streamID);
734
735 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, buffer, max, &usedLen);
736 streamID = CFStringEncodingStreamIDFromMask(result);
737 result &= ~CFStringEncodingStreamIDMask;
738
739 if (result != kCFStringEncodingConversionSuccess) {
740 if (kCFStringEncodingInvalidInputStream == result) {
741 CFRange composedRange;
742 // Check the tail
743 if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
744 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
745
746 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
747 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
748 streamID = CFStringEncodingStreamIDFromMask(result);
749 result &= ~CFStringEncodingStreamIDMask;
9ce05555 750 }
cf7d2af9
A
751 }
752
753 // Check the head
754 if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
755 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
756
757 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
758 // Try if the composed range can be converted
759 CFStringGetCharacters(string, composedRange, charBuf);
760
761 if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
762 CFIndex lastRangeLoc = rangeLoc - lastNumChars;
763
764 currentLength = composedRange.location - lastRangeLoc;
765 CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
766
767 result = CFStringEncodingUnicodeToBytes(encoding, flags|streamingMask, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen);
768 streamID = CFStringEncodingStreamIDFromMask(result);
769 result &= ~CFStringEncodingStreamIDMask;
770
771 if (result == kCFStringEncodingConversionSuccess) { // OK let's try the last run
772 // Looks good. back up
773 totalBytesWritten -= lastUsedLen;
774 numCharsProcessed -= lastNumChars;
775
776 rangeLoc = lastRangeLoc;
777 rangeLen += lastNumChars;
778
779 if (max) {
780 buffer -= lastUsedLen;
781 max += lastUsedLen;
9ce05555
A
782 }
783 }
784 }
785 }
786 }
9ce05555 787 }
cf7d2af9
A
788
789 if (kCFStringEncodingConversionSuccess != result) { // really failed
790 totalBytesWritten += usedLen;
791 numCharsProcessed += numChars;
792 break;
793 }
9ce05555 794 }
cf7d2af9 795
9ce05555
A
796 totalBytesWritten += usedLen;
797 numCharsProcessed += numChars;
798
799 rangeLoc += numChars;
800 rangeLen -= numChars;
801 if (max) {
802 buffer += usedLen;
803 max -= usedLen;
804 if (max <= 0) break;
805 }
806 lastUsedLen = usedLen; lastNumChars = numChars;
807 flags &= ~kCFStringEncodingPrependBOM;
808 }
809 }
810 }
811 if (usedBufLen) *usedBufLen = totalBytesWritten;
812 return numCharsProcessed;
813}
814
d8925383
A
815CFStringRef CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc, const char *buffer) {
816 return CFStringCreateWithCString(alloc, buffer, CFStringFileSystemEncoding());
817}
818
819CFIndex CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string) {
820 CFIndex len = CFStringGetLength(string);
821 CFStringEncoding enc = CFStringGetFastestEncoding(string);
822 switch (enc) {
823 case kCFStringEncodingASCII:
824 case kCFStringEncodingMacRoman:
cf7d2af9
A
825 if (len > (LONG_MAX - 1L) / 3L) return kCFNotFound; // Avoid wrap-around
826 return len * 3L + 1L;
d8925383 827 default:
cf7d2af9
A
828 if (len > (LONG_MAX - 1L) / 9L) return kCFNotFound; // Avoid wrap-around
829 return len * 9L + 1L;
d8925383
A
830 }
831}
832
833Boolean CFStringGetFileSystemRepresentation(CFStringRef string, char *buffer, CFIndex maxBufLen) {
cf7d2af9 834#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
d8925383 835#define MAX_STACK_BUFFER_LEN (255)
9ce05555 836 const UTF16Char *characters = CFStringGetCharactersPtr(string);
bd5b749c 837 const char *bufferLimit = buffer + maxBufLen;
0ae65c4b 838 CFIndex length = CFStringGetLength(string);
bd5b749c 839 CFIndex usedBufLen;
9ce05555 840
0ae65c4b 841 if (maxBufLen < length) return false; // Since we're using UTF-8, the byte length is never shorter than the char length. Also, it filters out 0 == maxBufLen
9ce05555 842
0ae65c4b 843 if (NULL == characters) {
bd5b749c
A
844 UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
845 CFRange range = CFRangeMake(0, 0);
846 const char *bytes = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding());
9ce05555 847
bd5b749c
A
848 if (NULL != bytes) {
849 const char *originalBytes = bytes;
850 const char *bytesLimit = bytes + length;
9ce05555 851
bd5b749c 852 while ((bytes < bytesLimit) && (buffer < bufferLimit) && (0 == (*bytes & 0x80))) *(buffer++) = *(bytes++);
9ce05555 853
bd5b749c
A
854 range.location = bytes - originalBytes;
855 }
856 while ((range.location < length) && (buffer < bufferLimit)) {
857 range.length = length - range.location;
858 if (range.length > MAX_STACK_BUFFER_LEN) range.length = MAX_STACK_BUFFER_LEN;
9ce05555 859
bd5b749c
A
860 CFStringGetCharacters(string, range, charactersBuffer);
861 if ((range.length == MAX_STACK_BUFFER_LEN) && CFUniCharIsSurrogateHighCharacter(charactersBuffer[MAX_STACK_BUFFER_LEN - 1])) --range.length; // Backup for a high surrogate
862
863 if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, bufferLimit - buffer, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
9ce05555 864
9ce05555 865 buffer += usedBufLen;
bd5b749c 866 range.location += range.length;
9ce05555
A
867 }
868 } else {
0ae65c4b 869 if (!CFUniCharDecompose(characters, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
9ce05555
A
870 buffer += usedBufLen;
871 }
872
bd5b749c 873 if (buffer < bufferLimit) { // Since the filename has its own limit, this is ok for now
9ce05555
A
874 *buffer = '\0';
875 return true;
876 } else {
877 return false;
878 }
cf7d2af9 879#else
9ce05555 880 return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
cf7d2af9 881#endif
9ce05555 882}
d8925383
A
883
884Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
bd5b749c 885 return CFStringGetFileSystemRepresentation(string, (char *)buffer, maxBufLen);
d8925383
A
886}
887
cf7d2af9
A
888
889#if (TARGET_OS_MAC && !(TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)) || (TARGET_OS_EMBEDDED || TARGET_OS_IPHONE)
890
891/* This function is used to obtain users' default script/region code.
892 The function first looks at environment variable __kCFUserEncodingEnvVariableName, then, reads the configuration file in user's home directory.
893*/
894void _CFStringGetUserDefaultEncoding(UInt32 *oScriptValue, UInt32 *oRegionValue) {
895 char *stringValue;
896 char buffer[__kCFMaxDefaultEncodingFileLength];
897 int uid = getuid();
898
899 if ((stringValue = getenv(__kCFUserEncodingEnvVariableName)) != NULL) {
900 if ((uid == strtol_l(stringValue, &stringValue, 0, NULL)) && (':' == *stringValue)) {
901 ++stringValue;
902 } else {
903 stringValue = NULL;
904 }
905 }
906
907 if ((stringValue == NULL) && ((uid > 0) || getenv("HOME"))) {
908 char passwdExtraBuf[1000 + MAXPATHLEN]; // Extra memory buffer for getpwuid_r(); no clue as to how large this should be...
909 struct passwd passwdBuf, *passwdp = NULL;
910
911 switch (getpwuid_r((uid_t)uid, &passwdBuf, passwdExtraBuf, sizeof(passwdExtraBuf), &passwdp)) {
912 case 0: // Success
913 break;
914 case ERANGE: // Somehow we didn't give it enough memory; let the system handle the storage this time; but beware 5778609
915 passwdp = getpwuid((uid_t)uid);
916 break;
917 default:
918 passwdp = NULL;
919 }
920 if (passwdp) {
921 char filename[MAXPATHLEN + 1];
922
923 const char *path = NULL;
924 if (!issetugid()) {
925 path = getenv("CFFIXED_USER_HOME");
926 }
927 if (!path) {
928 path = passwdp->pw_dir;
929 }
930
931 strlcpy(filename, path, sizeof(filename));
932 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
933
934 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
935 int fd = open(filename, O_RDONLY, 0);
936 if (fd == -1) {
937 // Cannot open the file. Let's fallback to smRoman/verUS
938 snprintf(filename, sizeof(filename), "0x%X:0:0", uid);
939 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
940 } else {
941 int readSize;
942 readSize = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
943 buffer[(readSize < 0 ? 0 : readSize)] = '\0';
944 close(fd);
945 stringValue = buffer;
946
947 // Well, we already have a buffer, let's reuse it
948 snprintf(filename, sizeof(filename), "0x%X:%s", uid, buffer);
949 setenv(__kCFUserEncodingEnvVariableName, filename, 1);
950 }
951 if (-1 != no_hang_fd) close(no_hang_fd);
952 }
953 }
954
955 if (stringValue) {
956 *oScriptValue = strtol_l(stringValue, &stringValue, 0, NULL);
957 if (*stringValue == ':') {
958 if (oRegionValue) *oRegionValue = strtol_l(++stringValue, NULL, 0, NULL);
959 return;
960 }
961 }
962
963 // Falling back
964 *oScriptValue = 0; // smRoman
965 if (oRegionValue) *oRegionValue = 0; // verUS
966}
967
968void _CFStringGetInstallationEncodingAndRegion(uint32_t *encoding, uint32_t *region) {
969 char buffer[__kCFMaxDefaultEncodingFileLength];
970 char *stringValue = NULL;
971
972 *encoding = 0;
973 *region = 0;
974
975 struct passwd *passwdp = getpwuid((uid_t)0);
976 if (passwdp) {
977 const char *path = passwdp->pw_dir;
978
979 char filename[MAXPATHLEN + 1];
980 strlcpy(filename, path, sizeof(filename));
981 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
982
983 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
984 int fd = open(filename, O_RDONLY, 0);
985 if (0 <= fd) {
986 size_t size = read(fd, buffer, __kCFMaxDefaultEncodingFileLength - 1);
987 buffer[(size < 0 ? 0 : size)] = '\0';
988 close(fd);
989 stringValue = buffer;
990 }
991 if (-1 != no_hang_fd) close(no_hang_fd);
992 }
993
994 if (stringValue) {
995 *encoding = strtol_l(stringValue, &stringValue, 0, NULL);
996 if (*stringValue == ':') *region = strtol_l(++stringValue, NULL, 0, NULL);
997 }
998}
999
1000Boolean _CFStringSaveUserDefaultEncoding(UInt32 iScriptValue, UInt32 iRegionValue) {
1001 Boolean success = false;
1002 struct passwd *passwdp = getpwuid(getuid());
1003 if (passwdp) {
1004 const char *path = passwdp->pw_dir;
1005 if (!issetugid()) {
1006 char *value = getenv("CFFIXED_USER_HOME");
1007 if (value) path = value; // override
1008 }
1009
1010 char filename[MAXPATHLEN + 1];
1011 strlcpy(filename, path, sizeof(filename));
1012 strlcat(filename, __kCFUserEncodingFileName, sizeof(filename));
1013
1014 int no_hang_fd = __CFProphylacticAutofsAccess ? open("/dev/autofs_nowait", 0) : -1;
1015 (void)unlink(filename);
1016 int fd = open(filename, O_WRONLY|O_CREAT, 0400);
1017 if (0 <= fd) {
1018 char buffer[__kCFMaxDefaultEncodingFileLength];
1019 size_t size = snprintf(buffer, __kCFMaxDefaultEncodingFileLength, "0x%X:0x%X", (unsigned int)iScriptValue, (unsigned int)iRegionValue);
1020 if (size <= __kCFMaxDefaultEncodingFileLength) {
1021 int ret = write(fd, buffer, size);
1022 if (size <= ret) success = true;
1023 }
1024 int save_err = errno;
1025 close(fd);
1026 errno = save_err;
1027 }
1028 int save_err = errno;
1029 if (-1 != no_hang_fd) close(no_hang_fd);
1030 errno = save_err;
1031 }
1032 return success;
1033}
1034
1035#endif
1036