]> git.saurik.com Git - apple/cf.git/blob - String.subproj/CFStringEncodings.c
a82057c44f5e923e24c508db959ea8fcbee0fa00
[apple/cf.git] / String.subproj / CFStringEncodings.c
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* CFStringEncodings.c
26 Copyright 1999-2002, Apple, Inc. All rights reserved.
27 Responsibility: Aki Inoue
28 */
29
30 #include "CFInternal.h"
31 #include <CoreFoundation/CFString.h>
32 #include <CoreFoundation/CFByteOrder.h>
33 #include "CFUtilities.h"
34 #include <string.h>
35 #include "CFStringEncodingConverterExt.h"
36 #include "CFUniChar.h"
37 #include "CFUnicodeDecomposition.h"
38
39 static UInt32 __CFWantsToUseASCIICompatibleConversion = (UInt32)-1;
40 CF_INLINE UInt32 __CFGetASCIICompatibleFlag(void) {
41 if (__CFWantsToUseASCIICompatibleConversion == (UInt32)-1) {
42 __CFWantsToUseASCIICompatibleConversion = false;
43 }
44 return (__CFWantsToUseASCIICompatibleConversion ? kCFStringEncodingASCIICompatibleConversion : 0);
45 }
46
47 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag) {
48 __CFWantsToUseASCIICompatibleConversion = (flag ? (UInt32)true : (UInt32)false);
49 }
50
51 Boolean (*__CFCharToUniCharFunc)(UInt32 flags, uint8_t ch, UniChar *unicodeChar) = NULL;
52
53 // To avoid early initialization issues, we just initialize this here
54 // This should not be const as it is changed
55 UniChar __CFCharToUniCharTable[256] = {
56 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
57 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
58 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
59 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
60 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
61 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
62 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
63 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
64 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
65 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
66 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
67 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
68 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
69 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
70 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
71 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
72 };
73
74 void __CFSetCharToUniCharFunc(Boolean (*func)(UInt32 flags, UInt8 ch, UniChar *unicodeChar)) {
75 if (__CFCharToUniCharFunc != func) {
76 int ch;
77 __CFCharToUniCharFunc = func;
78 if (func) {
79 for (ch = 128; ch < 256; ch++) {
80 UniChar uch;
81 __CFCharToUniCharTable[ch] = (__CFCharToUniCharFunc(0, ch, &uch) ? uch : 0xFFFD);
82 }
83 } else { // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
84 for (ch = 128; ch < 256; ch++) __CFCharToUniCharTable[ch] = ch;
85 }
86 }
87 }
88
89 __private_extern__ void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars) {
90 CFIndex idx;
91 for (idx = 0; idx < numChars; idx++) buffer[idx] = __CFCharToUniCharTable[bytes[idx]];
92 }
93
94
95 /* The minimum length the output buffers should be in the above functions
96 */
97 #define kCFCharConversionBufferLength 512
98
99
100 #define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))
101 #define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))
102
103 #if defined(__BIG_ENDIAN__)
104 #define SHOULD_SWAP(BOM) (BOM == 0xFFFE)
105 #else
106 #define SHOULD_SWAP(BOM) (BOM != 0xFEFF)
107 #endif
108
109 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
110 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
111 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
112 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
113 !!! converterFlags is only used for the UTF8 converter at this point
114 */
115 Boolean __CFStringDecodeByteStream2(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr) {
116 return __CFStringDecodeByteStream3(bytes, len, encoding, alwaysUnicode, buffer, useClientsMemoryPtr, 0);
117 }
118
119 enum {
120 __NSNonLossyErrorMode = -1,
121 __NSNonLossyASCIIMode = 0,
122 __NSNonLossyBackslashMode = 1,
123 __NSNonLossyHexInitialMode = __NSNonLossyBackslashMode + 1,
124 __NSNonLossyHexFinalMode = __NSNonLossyHexInitialMode + 4,
125 __NSNonLossyOctalInitialMode = __NSNonLossyHexFinalMode + 1,
126 __NSNonLossyOctalFinalMode = __NSNonLossyHexFinalMode + 3
127 };
128
129 Boolean __CFStringDecodeByteStream3(const uint8_t *bytes, UInt32 len, CFStringEncoding encoding, Boolean alwaysUnicode, CFVarWidthCharBuffer *buffer, Boolean *useClientsMemoryPtr, UInt32 converterFlags) {
130 UInt32 idx;
131 const UniChar *uniChars = (const UniChar *)bytes;
132 const uint8_t *chars = (const uint8_t *)bytes;
133 const uint8_t *end = chars + len;
134 uint16_t bom;
135 Boolean allASCII = false;
136
137 if (useClientsMemoryPtr) *useClientsMemoryPtr = false;
138
139 buffer->isASCII = !alwaysUnicode;
140 buffer->shouldFreeChars = false;
141 buffer->numChars = 0;
142 if (0 == len) return true;
143
144 buffer->allocator = (buffer->allocator ? buffer->allocator : __CFGetDefaultAllocator());
145 switch (encoding) {
146 case kCFStringEncodingUnicode:
147 bom = (*uniChars == 0xfffe || *uniChars == 0xfeff) ? (*uniChars++) : 0;
148 /* If the byte order mark is missing, we assume big endian... */
149 len = len / 2 - (0 == bom ? 0 : 1);
150
151 if (buffer->isASCII) { // Let's see if we can reduce the Unicode down to ASCII...
152 if (SHOULD_SWAP(bom)) {
153 for (idx = 0; idx < len; idx++) if ((uniChars[idx] & 0x80ff) != 0) {buffer->isASCII = false; break;}
154 } else {
155 for (idx = 0; idx < len; idx++) if (uniChars[idx] > 127) {buffer->isASCII = false; break;}
156 }
157 }
158
159 if (buffer->isASCII) {
160 buffer->numChars = len;
161 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
162 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
163 if (SHOULD_SWAP(bom)) { // !!! Can be somewhat trickier here and use a single loop with a properly inited ptr
164 for (idx = 0; idx < len; idx++) buffer->chars.ascii[idx] = (uniChars[idx] >> 8);
165 } else {
166 for (idx = 0; idx < len; idx++) buffer->chars.ascii[idx] = uniChars[idx];
167 }
168 } else {
169 buffer->numChars = len;
170 if (useClientsMemoryPtr && (bom == 0) && !SHOULD_SWAP(bom)) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
171 *useClientsMemoryPtr = true;
172 buffer->shouldFreeChars = false;
173 buffer->chars.unicode = (UniChar *)bytes;
174 } else {
175 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
176 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
177 if (SHOULD_SWAP(bom)) {
178 for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = CFSwapInt16(uniChars[idx]);
179 } else {
180 memmove(buffer->chars.unicode, uniChars, len * sizeof(UniChar));
181 }
182 }
183 }
184 return true;
185
186 case kCFStringEncodingNonLossyASCII: {
187 UTF16Char currentValue = 0;
188 uint8_t character;
189 int8_t mode = __NSNonLossyASCIIMode;
190
191 buffer->isASCII = false;
192 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
193 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
194 buffer->numChars = 0;
195
196 while (chars < end) {
197 character = (*chars++);
198
199 switch (mode) {
200 case __NSNonLossyASCIIMode:
201 if (character == '\\') {
202 mode = __NSNonLossyBackslashMode;
203 } else if (character < 0x80) {
204 currentValue = character;
205 } else {
206 mode = __NSNonLossyErrorMode;
207 }
208 break;
209
210 case __NSNonLossyBackslashMode:
211 if ((character == 'U') || (character == 'u')) {
212 mode = __NSNonLossyHexInitialMode;
213 currentValue = 0;
214 } else if ((character >= '0') && (character <= '9')) {
215 mode = __NSNonLossyOctalInitialMode;
216 currentValue = character - '0';
217 } else if (character == '\\') {
218 mode = __NSNonLossyASCIIMode;
219 currentValue = character;
220 } else {
221 mode = __NSNonLossyErrorMode;
222 }
223 break;
224
225 default:
226 if (mode < __NSNonLossyHexFinalMode) {
227 if ((character >= '0') && (character <= '9')) {
228 currentValue = (currentValue << 4) | (character - '0');
229 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
230 } else {
231 if (character >= 'a') character -= ('a' - 'A');
232 if ((character >= 'A') && (character <= 'F')) {
233 currentValue = (currentValue << 4) | ((character - 'A') + 10);
234 if (++mode == __NSNonLossyHexFinalMode) mode = __NSNonLossyASCIIMode;
235 } else {
236 mode = __NSNonLossyErrorMode;
237 }
238 }
239 } else {
240 if ((character >= '0') && (character <= '9')) {
241 currentValue = (currentValue << 3) | (character - '0');
242 if (++mode == __NSNonLossyOctalFinalMode) mode = __NSNonLossyASCIIMode;
243 } else {
244 mode = __NSNonLossyErrorMode;
245 }
246 }
247 break;
248 }
249
250 if (mode == __NSNonLossyASCIIMode) {
251 buffer->chars.unicode[buffer->numChars++] = currentValue;
252 } else if (mode == __NSNonLossyErrorMode) {
253 return false;
254 }
255 }
256 return (mode == __NSNonLossyASCIIMode);
257 }
258
259 case kCFStringEncodingUTF8:
260 if ((len >= 3) && (chars[0] == 0xef) && (chars[1] == 0xbb) && (chars[2] == 0xbf)) { // If UTF8 BOM, skip
261 chars += 3;
262 len -= 3;
263 if (0 == len) return true;
264 }
265 allASCII = !alwaysUnicode;
266 if (allASCII) {
267 for (idx = 0; idx < len; idx++) {
268 if (128 <= chars[idx]) {
269 allASCII = false;
270 break;
271 }
272 }
273 }
274 buffer->isASCII = allASCII;
275 if (allASCII) {
276 buffer->numChars = len;
277 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
278 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
279 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
280 } else {
281 UInt32 numDone;
282 static CFStringEncodingToUnicodeProc __CFFromUTF8 = NULL;
283
284 if (!__CFFromUTF8) {
285 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
286 __CFFromUTF8 = (CFStringEncodingToUnicodeProc)converter->toUnicode;
287 }
288
289 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
290 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
291 buffer->numChars = 0;
292 while (chars < end) {
293 numDone = 0;
294 chars += __CFFromUTF8(converterFlags, chars, end - chars, &(buffer->chars.unicode[buffer->numChars]), len - buffer->numChars, &numDone);
295
296 if (0 == numDone) {
297 if (buffer->shouldFreeChars) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
298 buffer->isASCII = !alwaysUnicode;
299 buffer->shouldFreeChars = false;
300 buffer->chars.ascii = NULL;
301 buffer->numChars = 0;
302 return false;
303 }
304 buffer->numChars += numDone;
305 }
306 }
307 return true;
308
309 default:
310 if (CFStringEncodingIsValidEncoding(encoding)) {
311 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(encoding);
312 Boolean isASCIISuperset = __CFStringEncodingIsSupersetOfASCII(encoding);
313
314 if (!converter) return false;
315
316 if (converter->encodingClass == kCFStringEncodingConverterCheapEightBit) {
317 allASCII = !alwaysUnicode && isASCIISuperset;
318 if (allASCII) {
319 for (idx = 0; idx < len; idx++) {
320 if (128 <= chars[idx]) {
321 allASCII = false;
322 break;
323 }
324 }
325 }
326 buffer->isASCII = allASCII;
327 if (allASCII) {
328 buffer->numChars = len;
329 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
330 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
331 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
332 } else {
333 buffer->shouldFreeChars = !buffer->chars.unicode && (len <= MAX_LOCAL_UNICHARS) ? false : true;
334 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (len <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(UniChar), 0));
335 buffer->numChars = len;
336 if (kCFStringEncodingASCII == encoding || kCFStringEncodingISOLatin1 == encoding) {
337 for (idx = 0; idx < len; idx++) buffer->chars.unicode[idx] = (UniChar)chars[idx];
338 } else {
339 for (idx = 0; idx < len; idx++)
340 if (chars[idx] < 0x80 && isASCIISuperset)
341 buffer->chars.unicode[idx] = (UniChar)chars[idx];
342 else if (!((CFStringEncodingCheapEightBitToUnicodeProc)converter->toUnicode)(0, chars[idx], buffer->chars.unicode + idx))
343 return false;
344 }
345 }
346 return true;
347 } else {
348 allASCII = !alwaysUnicode && isASCIISuperset;
349 if (allASCII) {
350 for (idx = 0; idx < len; idx++)
351 if (128 <= chars[idx]) {
352 allASCII = false;
353 break;
354 }
355 }
356 buffer->isASCII = allASCII;
357 if (allASCII) {
358 buffer->numChars = len;
359 buffer->shouldFreeChars = !buffer->chars.ascii && (len <= MAX_LOCAL_CHARS) ? false : true;
360 buffer->chars.ascii = (buffer->chars.ascii ? buffer->chars.ascii : (len <= MAX_LOCAL_CHARS) ? (uint8_t *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, len * sizeof(uint8_t), 0));
361 memmove(buffer->chars.ascii, chars, len * sizeof(uint8_t));
362 } else {
363 UInt32 guessedLength = CFStringEncodingCharLengthForBytes(encoding, 0, bytes, len);
364 static UInt32 lossyFlag = (UInt32)-1;
365
366 buffer->shouldFreeChars = !buffer->chars.unicode && (guessedLength <= MAX_LOCAL_UNICHARS) ? false : true;
367 buffer->chars.unicode = (buffer->chars.unicode ? buffer->chars.unicode : (guessedLength <= MAX_LOCAL_UNICHARS) ? (UniChar *)buffer->localBuffer : CFAllocatorAllocate(buffer->allocator, guessedLength * sizeof(UniChar), 0));
368
369 if (lossyFlag == (UInt32)-1) lossyFlag = (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? 0 : kCFStringEncodingAllowLossyConversion);
370
371 if (CFStringEncodingBytesToUnicode(encoding, lossyFlag|__CFGetASCIICompatibleFlag(), bytes, len, NULL, buffer->chars.unicode, (guessedLength > MAX_LOCAL_UNICHARS ? guessedLength : MAX_LOCAL_UNICHARS), &(buffer->numChars))) {
372 if (buffer->shouldFreeChars) CFAllocatorDeallocate(buffer->allocator, buffer->chars.unicode);
373 buffer->isASCII = !alwaysUnicode;
374 buffer->shouldFreeChars = false;
375 buffer->chars.ascii = NULL;
376 buffer->numChars = 0;
377 return false;
378 }
379 }
380 return true;
381 }
382 } else {
383 return false;
384 }
385 }
386 }
387
388
389 /* Create a byte stream from a CFString backing. Can convert a string piece at a time
390 into a fixed size buffer. Returns number of characters converted.
391 Characters that cannot be converted to the specified encoding are represented
392 with the char specified by lossByte; if 0, then lossy conversion is not allowed
393 and conversion stops, returning partial results.
394 Pass buffer==NULL if you don't care about the converted string (but just the convertability,
395 or number of bytes required, indicated by usedBufLen).
396 Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
397
398 Note: This function is intended to work through CFString functions, so it should work
399 with NSStrings as well as CFStrings.
400 */
401 CFIndex __CFStringEncodeByteStream(CFStringRef string, CFIndex rangeLoc, CFIndex rangeLen, Boolean generatingExternalFile, CFStringEncoding encoding, char lossByte, uint8_t *buffer, CFIndex max, CFIndex *usedBufLen) {
402 CFIndex totalBytesWritten = 0; /* Number of written bytes */
403 CFIndex numCharsProcessed = 0; /* Number of processed chars */
404 const UniChar *unichars;
405
406 if (encoding == kCFStringEncodingUTF8 && (unichars = CFStringGetCharactersPtr(string))) {
407 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
408
409 if (!__CFToUTF8) {
410 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
411 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
412 }
413 numCharsProcessed = __CFToUTF8((generatingExternalFile ? kCFStringEncodingPrependBOM : 0), unichars + rangeLoc, rangeLen, buffer, (buffer ? max : 0), &totalBytesWritten);
414
415 } else if (encoding == kCFStringEncodingNonLossyASCII) {
416 const char *hex = "0123456789abcdef";
417 UniChar ch;
418 CFStringInlineBuffer buf;
419 CFStringInitInlineBuffer(string, &buf, CFRangeMake(rangeLoc, rangeLen));
420 while (numCharsProcessed < rangeLen) {
421 CFIndex reqLength; /* Required number of chars to encode this UniChar */
422 CFIndex cnt;
423 char tmp[6];
424 ch = CFStringGetCharacterFromInlineBuffer(&buf, numCharsProcessed);
425 if ((ch >= ' ' && ch <= '~' && ch != '\\') || (ch == '\n' || ch == '\r' || ch == '\t')) {
426 reqLength = 1;
427 tmp[0] = ch;
428 } else {
429 if (ch == '\\') {
430 tmp[1] = '\\';
431 reqLength = 2;
432 } else if (ch < 256) { /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
433 tmp[1] = '0' + (ch >> 6);
434 tmp[2] = '0' + ((ch >> 3) & 7);
435 tmp[3] = '0' + (ch & 7);
436 reqLength = 4;
437 } else { /* \Unnnn */
438 tmp[1] = 'u'; // Changed to small+u in order to be aligned with Java
439 tmp[2] = hex[(ch >> 12) & 0x0f];
440 tmp[3] = hex[(ch >> 8) & 0x0f];
441 tmp[4] = hex[(ch >> 4) & 0x0f];
442 tmp[5] = hex[ch & 0x0f];
443 reqLength = 6;
444 }
445 tmp[0] = '\\';
446 }
447 if (buffer) {
448 if (totalBytesWritten + reqLength > max) break; /* Doesn't fit..
449 .*/
450 for (cnt = 0; cnt < reqLength; cnt++) {
451 buffer[totalBytesWritten + cnt] = tmp[cnt];
452 }
453 }
454 totalBytesWritten += reqLength;
455 numCharsProcessed++;
456 }
457 } else if (encoding == kCFStringEncodingUnicode) {
458 CFIndex extraForBOM = generatingExternalFile ? sizeof(UniChar) : 0;
459 numCharsProcessed = rangeLen;
460 if (buffer && (numCharsProcessed * (CFIndex)sizeof(UniChar) + extraForBOM > max)) {
461 numCharsProcessed = (max > extraForBOM) ? ((max - extraForBOM) / sizeof(UniChar)) : 0;
462 }
463 totalBytesWritten = (numCharsProcessed * sizeof(UniChar)) + extraForBOM;
464 if (buffer) {
465 if (generatingExternalFile) { /* Generate BOM */
466 #if defined(__BIG_ENDIAN__)
467 *buffer++ = 0xfe; *buffer++ = 0xff;
468 #else
469 *buffer++ = 0xff; *buffer++ = 0xfe;
470 #endif
471 }
472 CFStringGetCharacters(string, CFRangeMake(rangeLoc, numCharsProcessed), (UniChar *)buffer);
473 }
474 } else {
475 CFIndex numChars;
476 UInt32 flags;
477 const unsigned char *cString = NULL;
478
479 if (!CF_IS_OBJC(CFStringGetTypeID(), string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // Checking for NSString to avoid infinite recursion
480 const unsigned char *ptr;
481 if ((cString = CFStringGetCStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
482 ptr = (cString += rangeLoc);
483 if (__CFStringGetEightBitStringEncoding() == encoding) {
484 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
485 if (buffer) memmove(buffer, cString, numCharsProcessed);
486 if (usedBufLen) *usedBufLen = numCharsProcessed;
487 return numCharsProcessed;
488 }
489 while (*ptr < 0x80 && rangeLen > 0) {
490 ++ptr;
491 --rangeLen;
492 }
493 numCharsProcessed = ptr - cString;
494 if (buffer) {
495 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
496 memmove(buffer, cString, numCharsProcessed);
497 buffer += numCharsProcessed;
498 max -= numCharsProcessed;
499 }
500 if (!rangeLen || (buffer && (max == 0))) {
501 if (usedBufLen) *usedBufLen = numCharsProcessed;
502 return numCharsProcessed;
503 }
504 rangeLoc += numCharsProcessed;
505 totalBytesWritten += numCharsProcessed;
506 }
507 if (!cString && (cString = CFStringGetPascalStringPtr(string, __CFStringGetEightBitStringEncoding()))) {
508 ptr = (cString += (rangeLoc + 1));
509 if (__CFStringGetEightBitStringEncoding() == encoding) {
510 numCharsProcessed = (rangeLen < max || buffer == NULL ? rangeLen : max);
511 if (buffer) memmove(buffer, cString, numCharsProcessed);
512 if (usedBufLen) *usedBufLen = numCharsProcessed;
513 return numCharsProcessed;
514 }
515 while (*ptr < 0x80 && rangeLen > 0) {
516 ++ptr;
517 --rangeLen;
518 }
519 numCharsProcessed = ptr - cString;
520 if (buffer) {
521 numCharsProcessed = (numCharsProcessed < max ? numCharsProcessed : max);
522 memmove(buffer, cString, numCharsProcessed);
523 buffer += numCharsProcessed;
524 max -= numCharsProcessed;
525 }
526 if (!rangeLen || (buffer && (max == 0))) {
527 if (usedBufLen) *usedBufLen = numCharsProcessed;
528 return numCharsProcessed;
529 }
530 rangeLoc += numCharsProcessed;
531 totalBytesWritten += numCharsProcessed;
532 }
533 }
534
535 if (!buffer) max = 0;
536
537 // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
538 flags = (lossByte ? ((unsigned char)lossByte == 0xFF && encoding == kCFStringEncodingASCII ? kCFStringEncodingAllowLossyConversion : CFStringEncodingLossyByteToMask(lossByte)) : 0) | (generatingExternalFile ? kCFStringEncodingPrependBOM : 0) | __CFGetASCIICompatibleFlag();
539
540 if (!cString && (cString = (const char*)CFStringGetCharactersPtr(string))) { // Must be Unicode string
541 if (CFStringEncodingIsValidEncoding(encoding)) { // Converter available in CF
542 CFStringEncodingUnicodeToBytes(encoding, flags, (const UniChar*)cString + rangeLoc, rangeLen, &numCharsProcessed, buffer, max, &totalBytesWritten);
543 } else {
544 return 0;
545 }
546 } else {
547 UniChar charBuf[kCFCharConversionBufferLength];
548 UInt32 currentLength;
549 UInt32 usedLen;
550 uint32_t lastUsedLen = 0, lastNumChars = 0;
551 uint32_t result;
552 Boolean isCFBuiltin = CFStringEncodingIsValidEncoding(encoding);
553 #define MAX_DECOMP_LEN (6)
554
555 while (rangeLen > 0) {
556 currentLength = (rangeLen > kCFCharConversionBufferLength ? kCFCharConversionBufferLength : rangeLen);
557 CFStringGetCharacters(string, CFRangeMake(rangeLoc, currentLength), charBuf);
558
559 // could be in the middle of surrogate pair; back up.
560 if ((rangeLen > kCFCharConversionBufferLength) && CFUniCharIsSurrogateHighCharacter(charBuf[kCFCharConversionBufferLength - 1])) --currentLength;
561
562 if (isCFBuiltin) { // Converter available in CF
563 if ((result = CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, currentLength, &numChars, buffer, max, &usedLen)) != kCFStringEncodingConversionSuccess) {
564 if (kCFStringEncodingInvalidInputStream == result) {
565 CFRange composedRange;
566 // Check the tail
567 if ((rangeLen > kCFCharConversionBufferLength) && ((currentLength - numChars) < MAX_DECOMP_LEN)) {
568 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc + currentLength);
569
570 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < (rangeLoc + numChars))) {
571 result = CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.location - rangeLoc, &numChars, buffer, max, &usedLen);
572 }
573 }
574
575 // Check the head
576 if ((kCFStringEncodingConversionSuccess != result) && (lastNumChars > 0) && (numChars < MAX_DECOMP_LEN)) {
577 composedRange = CFStringGetRangeOfComposedCharactersAtIndex(string, rangeLoc);
578
579 if ((composedRange.length <= MAX_DECOMP_LEN) && (composedRange.location < rangeLoc)) {
580 // Try if the composed range can be converted
581 CFStringGetCharacters(string, composedRange, charBuf);
582
583 if (CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, composedRange.length, &numChars, NULL, 0, &usedLen) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
584 CFIndex lastRangeLoc = rangeLoc - lastNumChars;
585
586 currentLength = composedRange.location - lastRangeLoc;
587 CFStringGetCharacters(string, CFRangeMake(lastRangeLoc, currentLength), charBuf);
588
589 if ((result = CFStringEncodingUnicodeToBytes(encoding, flags, charBuf, currentLength, &numChars, (max ? buffer - lastUsedLen : NULL), (max ? max + lastUsedLen : 0), &usedLen)) == kCFStringEncodingConversionSuccess) { // OK let's try the last run
590 // Looks good. back up
591 totalBytesWritten -= lastUsedLen;
592 numCharsProcessed -= lastNumChars;
593
594 rangeLoc = lastRangeLoc;
595 rangeLen += lastNumChars;
596
597 if (max) {
598 buffer -= lastUsedLen;
599 max += lastUsedLen;
600 }
601 }
602 }
603 }
604 }
605 }
606
607 if (kCFStringEncodingConversionSuccess != result) { // really failed
608 totalBytesWritten += usedLen;
609 numCharsProcessed += numChars;
610 break;
611 }
612 }
613 } else {
614 return 0;
615 }
616
617 totalBytesWritten += usedLen;
618 numCharsProcessed += numChars;
619
620 rangeLoc += numChars;
621 rangeLen -= numChars;
622 if (max) {
623 buffer += usedLen;
624 max -= usedLen;
625 if (max <= 0) break;
626 }
627 lastUsedLen = usedLen; lastNumChars = numChars;
628 flags &= ~kCFStringEncodingPrependBOM;
629 }
630 }
631 }
632 if (usedBufLen) *usedBufLen = totalBytesWritten;
633 return numCharsProcessed;
634 }
635
636 #define MAX_STACK_BUFFER_LEN (255)
637 CF_EXPORT Boolean _CFStringGetFileSystemRepresentation(CFStringRef string, uint8_t *buffer, CFIndex maxBufLen) {
638 #if defined(__MACH__)
639 const UTF16Char *characters = CFStringGetCharactersPtr(string);
640 uint32_t usedBufLen;
641
642 if (NULL == characters) {
643 CFIndex length = CFStringGetLength(string);
644
645 if (length > MAX_STACK_BUFFER_LEN) {
646 UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN];
647 CFRange range = CFRangeMake(0, MAX_STACK_BUFFER_LEN);
648 uint32_t localUsedBufLen;
649
650 usedBufLen = 0;
651
652 while (length > 0) {
653 CFStringGetCharacters(string, range, charactersBuffer);
654 if (CFUniCharIsSurrogateHighCharacter(charactersBuffer[range.length - 1])) --range.length; // Backup for a high surrogate
655
656 if (!CFUniCharDecompose(charactersBuffer, range.length, NULL, (void *)buffer, maxBufLen - usedBufLen, &localUsedBufLen, true, kCFUniCharUTF8Format, true)) return false;
657 buffer += localUsedBufLen;
658 usedBufLen += localUsedBufLen;
659
660 length -= range.length;
661 range.location += range.length;
662 range.length = (length < MAX_STACK_BUFFER_LEN ? length : MAX_STACK_BUFFER_LEN);
663 }
664 } else {
665 UTF16Char charactersBuffer[MAX_STACK_BUFFER_LEN]; // C99 Variable array
666
667 CFStringGetCharacters(string, CFRangeMake(0, length), charactersBuffer);
668 if (!CFUniCharDecompose(charactersBuffer, length, NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
669 buffer += usedBufLen;
670 }
671 } else {
672 if (!CFUniCharDecompose(characters, CFStringGetLength(string), NULL, (void *)buffer, maxBufLen, &usedBufLen, true, kCFUniCharUTF8Format, true)) return false;
673 buffer += usedBufLen;
674 }
675
676 if (usedBufLen < (uint32_t)maxBufLen) { // Since the filename has its own limit, this is ok for now
677 *buffer = '\0';
678 return true;
679 } else {
680 return false;
681 }
682 #else __MACH__
683 return CFStringGetCString(string, buffer, maxBufLen, CFStringFileSystemEncoding());
684 #endif __MACH__
685 }