2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* CFStringEncodings.c
26 Copyright 1999-2002, Apple, Inc. All rights reserved.
27 Responsibility: Aki Inoue
30 #include "CFInternal.h"
31 #include <CoreFoundation/CFString.h>
32 #include <CoreFoundation/CFByteOrder.h>
33 #include "CFUtilities.h"
35 #include "CFStringEncodingConverterExt.h"
36 #include "CFUniChar.h"
37 #include "CFUnicodeDecomposition.h"
39 static UInt32 __CFWantsToUseASCIICompatibleConversion
= (UInt32
)-1;
40 CF_INLINE UInt32
__CFGetASCIICompatibleFlag(void) {
41 if (__CFWantsToUseASCIICompatibleConversion
== (UInt32
)-1) {
42 __CFWantsToUseASCIICompatibleConversion
= false;
44 return (__CFWantsToUseASCIICompatibleConversion
? kCFStringEncodingASCIICompatibleConversion
: 0);
47 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag
) {
48 __CFWantsToUseASCIICompatibleConversion
= (flag
? (UInt32
)true : (UInt32
)false);
51 Boolean (*__CFCharToUniCharFunc
)(UInt32 flags
, uint8_t ch
, UniChar
*unicodeChar
) = NULL
;
53 // To avoid early initialization issues, we just initialize this here
54 // This should not be const as it is changed
55 UniChar __CFCharToUniCharTable
[256] = {
56 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
57 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
58 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
59 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
60 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
61 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
62 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
63 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
64 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
65 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
66 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
67 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
68 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
69 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
70 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
71 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
74 void __CFSetCharToUniCharFunc(Boolean (*func
)(UInt32 flags
, UInt8 ch
, UniChar
*unicodeChar
)) {
75 if (__CFCharToUniCharFunc
!= func
) {
77 __CFCharToUniCharFunc
= func
;
79 for (ch
= 128; ch
< 256; ch
++) {
81 __CFCharToUniCharTable
[ch
] = (__CFCharToUniCharFunc(0, ch
, &uch
) ? uch
: 0xFFFD);
83 } else { // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
84 for (ch
= 128; ch
< 256; ch
++) __CFCharToUniCharTable
[ch
] = ch
;
89 __private_extern__
void __CFStrConvertBytesToUnicode(const uint8_t *bytes
, UniChar
*buffer
, CFIndex numChars
) {
91 for (idx
= 0; idx
< numChars
; idx
++) buffer
[idx
] = __CFCharToUniCharTable
[bytes
[idx
]];
95 /* The minimum length the output buffers should be in the above functions
97 #define kCFCharConversionBufferLength 512
100 #define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))
101 #define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))
103 #if defined(__BIG_ENDIAN__)
104 #define SHOULD_SWAP(BOM) (BOM == 0xFFFE)
106 #define SHOULD_SWAP(BOM) (BOM != 0xFEFF)
109 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
110 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
111 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
112 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
113 !!! converterFlags is only used for the UTF8 converter at this point
115 Boolean
__CFStringDecodeByteStream2(const uint8_t *bytes
, UInt32 len
, CFStringEncoding encoding
, Boolean alwaysUnicode
, CFVarWidthCharBuffer
*buffer
, Boolean
*useClientsMemoryPtr
) {
116 return __CFStringDecodeByteStream3(bytes
, len
, encoding
, alwaysUnicode
, buffer
, useClientsMemoryPtr
, 0);
120 __NSNonLossyErrorMode
= -1,
121 __NSNonLossyASCIIMode
= 0,
122 __NSNonLossyBackslashMode
= 1,
123 __NSNonLossyHexInitialMode
= __NSNonLossyBackslashMode
+ 1,
124 __NSNonLossyHexFinalMode
= __NSNonLossyHexInitialMode
+ 4,
125 __NSNonLossyOctalInitialMode
= __NSNonLossyHexFinalMode
+ 1,
126 __NSNonLossyOctalFinalMode
= __NSNonLossyHexFinalMode
+ 3
129 Boolean
__CFStringDecodeByteStream3(const uint8_t *bytes
, UInt32 len
, CFStringEncoding encoding
, Boolean alwaysUnicode
, CFVarWidthCharBuffer
*buffer
, Boolean
*useClientsMemoryPtr
, UInt32 converterFlags
) {
131 const UniChar
*uniChars
= (const UniChar
*)bytes
;
132 const uint8_t *chars
= (const uint8_t *)bytes
;
133 const uint8_t *end
= chars
+ len
;
135 Boolean allASCII
= false;
137 if (useClientsMemoryPtr
) *useClientsMemoryPtr
= false;
139 buffer
->isASCII
= !alwaysUnicode
;
140 buffer
->shouldFreeChars
= false;
141 buffer
->numChars
= 0;
142 if (0 == len
) return true;
144 buffer
->allocator
= (buffer
->allocator
? buffer
->allocator
: __CFGetDefaultAllocator());
146 case kCFStringEncodingUnicode
:
147 bom
= (*uniChars
== 0xfffe || *uniChars
== 0xfeff) ? (*uniChars
++) : 0;
148 /* If the byte order mark is missing, we assume big endian... */
149 len
= len
/ 2 - (0 == bom
? 0 : 1);
151 if (buffer
->isASCII
) { // Let's see if we can reduce the Unicode down to ASCII...
152 if (SHOULD_SWAP(bom
)) {
153 for (idx
= 0; idx
< len
; idx
++) if ((uniChars
[idx
] & 0x80ff) != 0) {buffer
->isASCII
= false; break;}
155 for (idx
= 0; idx
< len
; idx
++) if (uniChars
[idx
] > 127) {buffer
->isASCII
= false; break;}
159 if (buffer
->isASCII
) {
160 buffer
->numChars
= len
;
161 buffer
->shouldFreeChars
= !buffer
->chars
.ascii
&& (len
<= MAX_LOCAL_CHARS
) ? false : true;
162 buffer
->chars
.ascii
= (buffer
->chars
.ascii
? buffer
->chars
.ascii
: (len
<= MAX_LOCAL_CHARS
) ? (uint8_t *)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(uint8_t), 0));
163 if (SHOULD_SWAP(bom
)) { // !!! Can be somewhat trickier here and use a single loop with a properly inited ptr
164 for (idx
= 0; idx
< len
; idx
++) buffer
->chars
.ascii
[idx
] = (uniChars
[idx
] >> 8);
166 for (idx
= 0; idx
< len
; idx
++) buffer
->chars
.ascii
[idx
] = uniChars
[idx
];
169 buffer
->numChars
= len
;
170 if (useClientsMemoryPtr
&& (bom
== 0) && !SHOULD_SWAP(bom
)) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
171 *useClientsMemoryPtr
= true;
172 buffer
->shouldFreeChars
= false;
173 buffer
->chars
.unicode
= (UniChar
*)bytes
;
175 buffer
->shouldFreeChars
= !buffer
->chars
.unicode
&& (len
<= MAX_LOCAL_UNICHARS
) ? false : true;
176 buffer
->chars
.unicode
= (buffer
->chars
.unicode
? buffer
->chars
.unicode
: (len
<= MAX_LOCAL_UNICHARS
) ? (UniChar
*)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(UniChar
), 0));
177 if (SHOULD_SWAP(bom
)) {
178 for (idx
= 0; idx
< len
; idx
++) buffer
->chars
.unicode
[idx
] = CFSwapInt16(uniChars
[idx
]);
180 memmove(buffer
->chars
.unicode
, uniChars
, len
* sizeof(UniChar
));
186 case kCFStringEncodingNonLossyASCII
: {
187 UTF16Char currentValue
= 0;
189 int8_t mode
= __NSNonLossyASCIIMode
;
191 buffer
->isASCII
= false;
192 buffer
->shouldFreeChars
= !buffer
->chars
.unicode
&& (len
<= MAX_LOCAL_UNICHARS
) ? false : true;
193 buffer
->chars
.unicode
= (buffer
->chars
.unicode
? buffer
->chars
.unicode
: (len
<= MAX_LOCAL_UNICHARS
) ? (UniChar
*)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(UniChar
), 0));
194 buffer
->numChars
= 0;
196 while (chars
< end
) {
197 character
= (*chars
++);
200 case __NSNonLossyASCIIMode
:
201 if (character
== '\\') {
202 mode
= __NSNonLossyBackslashMode
;
203 } else if (character
< 0x80) {
204 currentValue
= character
;
206 mode
= __NSNonLossyErrorMode
;
210 case __NSNonLossyBackslashMode
:
211 if ((character
== 'U') || (character
== 'u')) {
212 mode
= __NSNonLossyHexInitialMode
;
214 } else if ((character
>= '0') && (character
<= '9')) {
215 mode
= __NSNonLossyOctalInitialMode
;
216 currentValue
= character
- '0';
217 } else if (character
== '\\') {
218 mode
= __NSNonLossyASCIIMode
;
219 currentValue
= character
;
221 mode
= __NSNonLossyErrorMode
;
226 if (mode
< __NSNonLossyHexFinalMode
) {
227 if ((character
>= '0') && (character
<= '9')) {
228 currentValue
= (currentValue
<< 4) | (character
- '0');
229 if (++mode
== __NSNonLossyHexFinalMode
) mode
= __NSNonLossyASCIIMode
;
231 if (character
>= 'a') character
-= ('a' - 'A');
232 if ((character
>= 'A') && (character
<= 'F')) {
233 currentValue
= (currentValue
<< 4) | ((character
- 'A') + 10);
234 if (++mode
== __NSNonLossyHexFinalMode
) mode
= __NSNonLossyASCIIMode
;
236 mode
= __NSNonLossyErrorMode
;
240 if ((character
>= '0') && (character
<= '9')) {
241 currentValue
= (currentValue
<< 3) | (character
- '0');
242 if (++mode
== __NSNonLossyOctalFinalMode
) mode
= __NSNonLossyASCIIMode
;
244 mode
= __NSNonLossyErrorMode
;
250 if (mode
== __NSNonLossyASCIIMode
) {
251 buffer
->chars
.unicode
[buffer
->numChars
++] = currentValue
;
252 } else if (mode
== __NSNonLossyErrorMode
) {
256 return (mode
== __NSNonLossyASCIIMode
);
259 case kCFStringEncodingUTF8
:
260 if ((len
>= 3) && (chars
[0] == 0xef) && (chars
[1] == 0xbb) && (chars
[2] == 0xbf)) { // If UTF8 BOM, skip
263 if (0 == len
) return true;
265 allASCII
= !alwaysUnicode
;
267 for (idx
= 0; idx
< len
; idx
++) {
268 if (128 <= chars
[idx
]) {
274 buffer
->isASCII
= allASCII
;
276 buffer
->numChars
= len
;
277 buffer
->shouldFreeChars
= !buffer
->chars
.ascii
&& (len
<= MAX_LOCAL_CHARS
) ? false : true;
278 buffer
->chars
.ascii
= (buffer
->chars
.ascii
? buffer
->chars
.ascii
: (len
<= MAX_LOCAL_CHARS
) ? (uint8_t *)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(uint8_t), 0));
279 memmove(buffer
->chars
.ascii
, chars
, len
* sizeof(uint8_t));
282 static CFStringEncodingToUnicodeProc __CFFromUTF8
= NULL
;
285 const CFStringEncodingConverter
*converter
= CFStringEncodingGetConverter(kCFStringEncodingUTF8
);
286 __CFFromUTF8
= (CFStringEncodingToUnicodeProc
)converter
->toUnicode
;
289 buffer
->shouldFreeChars
= !buffer
->chars
.unicode
&& (len
<= MAX_LOCAL_UNICHARS
) ? false : true;
290 buffer
->chars
.unicode
= (buffer
->chars
.unicode
? buffer
->chars
.unicode
: (len
<= MAX_LOCAL_UNICHARS
) ? (UniChar
*)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(UniChar
), 0));
291 buffer
->numChars
= 0;
292 while (chars
< end
) {
294 chars
+= __CFFromUTF8(converterFlags
, chars
, end
- chars
, &(buffer
->chars
.unicode
[buffer
->numChars
]), len
- buffer
->numChars
, &numDone
);
297 if (buffer
->shouldFreeChars
) CFAllocatorDeallocate(buffer
->allocator
, buffer
->chars
.unicode
);
298 buffer
->isASCII
= !alwaysUnicode
;
299 buffer
->shouldFreeChars
= false;
300 buffer
->chars
.ascii
= NULL
;
301 buffer
->numChars
= 0;
304 buffer
->numChars
+= numDone
;
310 if (CFStringEncodingIsValidEncoding(encoding
)) {
311 const CFStringEncodingConverter
*converter
= CFStringEncodingGetConverter(encoding
);
312 Boolean isASCIISuperset
= __CFStringEncodingIsSupersetOfASCII(encoding
);
314 if (!converter
) return false;
316 if (converter
->encodingClass
== kCFStringEncodingConverterCheapEightBit
) {
317 allASCII
= !alwaysUnicode
&& isASCIISuperset
;
319 for (idx
= 0; idx
< len
; idx
++) {
320 if (128 <= chars
[idx
]) {
326 buffer
->isASCII
= allASCII
;
328 buffer
->numChars
= len
;
329 buffer
->shouldFreeChars
= !buffer
->chars
.ascii
&& (len
<= MAX_LOCAL_CHARS
) ? false : true;
330 buffer
->chars
.ascii
= (buffer
->chars
.ascii
? buffer
->chars
.ascii
: (len
<= MAX_LOCAL_CHARS
) ? (uint8_t *)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(uint8_t), 0));
331 memmove(buffer
->chars
.ascii
, chars
, len
* sizeof(uint8_t));
333 buffer
->shouldFreeChars
= !buffer
->chars
.unicode
&& (len
<= MAX_LOCAL_UNICHARS
) ? false : true;
334 buffer
->chars
.unicode
= (buffer
->chars
.unicode
? buffer
->chars
.unicode
: (len
<= MAX_LOCAL_UNICHARS
) ? (UniChar
*)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(UniChar
), 0));
335 buffer
->numChars
= len
;
336 if (kCFStringEncodingASCII
== encoding
|| kCFStringEncodingISOLatin1
== encoding
) {
337 for (idx
= 0; idx
< len
; idx
++) buffer
->chars
.unicode
[idx
] = (UniChar
)chars
[idx
];
339 for (idx
= 0; idx
< len
; idx
++)
340 if (chars
[idx
] < 0x80 && isASCIISuperset
)
341 buffer
->chars
.unicode
[idx
] = (UniChar
)chars
[idx
];
342 else if (!((CFStringEncodingCheapEightBitToUnicodeProc
)converter
->toUnicode
)(0, chars
[idx
], buffer
->chars
.unicode
+ idx
))
348 allASCII
= !alwaysUnicode
&& isASCIISuperset
;
350 for (idx
= 0; idx
< len
; idx
++)
351 if (128 <= chars
[idx
]) {
356 buffer
->isASCII
= allASCII
;
358 buffer
->numChars
= len
;
359 buffer
->shouldFreeChars
= !buffer
->chars
.ascii
&& (len
<= MAX_LOCAL_CHARS
) ? false : true;
360 buffer
->chars
.ascii
= (buffer
->chars
.ascii
? buffer
->chars
.ascii
: (len
<= MAX_LOCAL_CHARS
) ? (uint8_t *)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(uint8_t), 0));
361 memmove(buffer
->chars
.ascii
, chars
, len
* sizeof(uint8_t));
363 UInt32 guessedLength
= CFStringEncodingCharLengthForBytes(encoding
, 0, bytes
, len
);
364 static UInt32 lossyFlag
= (UInt32
)-1;
366 buffer
->shouldFreeChars
= !buffer
->chars
.unicode
&& (guessedLength
<= MAX_LOCAL_UNICHARS
) ? false : true;
367 buffer
->chars
.unicode
= (buffer
->chars
.unicode
? buffer
->chars
.unicode
: (guessedLength
<= MAX_LOCAL_UNICHARS
) ? (UniChar
*)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, guessedLength
* sizeof(UniChar
), 0));
369 if (lossyFlag
== (UInt32
)-1) lossyFlag
= (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther
) ? 0 : kCFStringEncodingAllowLossyConversion
);
371 if (CFStringEncodingBytesToUnicode(encoding
, lossyFlag
|__CFGetASCIICompatibleFlag(), bytes
, len
, NULL
, buffer
->chars
.unicode
, (guessedLength
> MAX_LOCAL_UNICHARS
? guessedLength
: MAX_LOCAL_UNICHARS
), &(buffer
->numChars
))) {
372 if (buffer
->shouldFreeChars
) CFAllocatorDeallocate(buffer
->allocator
, buffer
->chars
.unicode
);
373 buffer
->isASCII
= !alwaysUnicode
;
374 buffer
->shouldFreeChars
= false;
375 buffer
->chars
.ascii
= NULL
;
376 buffer
->numChars
= 0;
389 /* Create a byte stream from a CFString backing. Can convert a string piece at a time
390 into a fixed size buffer. Returns number of characters converted.
391 Characters that cannot be converted to the specified encoding are represented
392 with the char specified by lossByte; if 0, then lossy conversion is not allowed
393 and conversion stops, returning partial results.
394 Pass buffer==NULL if you don't care about the converted string (but just the convertability,
395 or number of bytes required, indicated by usedBufLen).
396 Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
398 Note: This function is intended to work through CFString functions, so it should work
399 with NSStrings as well as CFStrings.
401 CFIndex
__CFStringEncodeByteStream(CFStringRef string
, CFIndex rangeLoc
, CFIndex rangeLen
, Boolean generatingExternalFile
, CFStringEncoding encoding
, char lossByte
, uint8_t *buffer
, CFIndex max
, CFIndex
*usedBufLen
) {
402 CFIndex totalBytesWritten
= 0; /* Number of written bytes */
403 CFIndex numCharsProcessed
= 0; /* Number of processed chars */
404 const UniChar
*unichars
;
406 if (encoding
== kCFStringEncodingUTF8
&& (unichars
= CFStringGetCharactersPtr(string
))) {
407 static CFStringEncodingToBytesProc __CFToUTF8
= NULL
;
410 const CFStringEncodingConverter
*utf8Converter
= CFStringEncodingGetConverter(kCFStringEncodingUTF8
);
411 __CFToUTF8
= (CFStringEncodingToBytesProc
)utf8Converter
->toBytes
;
413 numCharsProcessed
= __CFToUTF8((generatingExternalFile
? kCFStringEncodingPrependBOM
: 0), unichars
+ rangeLoc
, rangeLen
, buffer
, (buffer
? max
: 0), &totalBytesWritten
);
415 } else if (encoding
== kCFStringEncodingNonLossyASCII
) {
416 const char *hex
= "0123456789abcdef";
418 CFStringInlineBuffer buf
;
419 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(rangeLoc
, rangeLen
));
420 while (numCharsProcessed
< rangeLen
) {
421 CFIndex reqLength
; /* Required number of chars to encode this UniChar */
424 ch
= CFStringGetCharacterFromInlineBuffer(&buf
, numCharsProcessed
);
425 if ((ch
>= ' ' && ch
<= '~' && ch
!= '\\') || (ch
== '\n' || ch
== '\r' || ch
== '\t')) {
432 } else if (ch
< 256) { /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
433 tmp
[1] = '0' + (ch
>> 6);
434 tmp
[2] = '0' + ((ch
>> 3) & 7);
435 tmp
[3] = '0' + (ch
& 7);
437 } else { /* \Unnnn */
438 tmp
[1] = 'u'; // Changed to small+u in order to be aligned with Java
439 tmp
[2] = hex
[(ch
>> 12) & 0x0f];
440 tmp
[3] = hex
[(ch
>> 8) & 0x0f];
441 tmp
[4] = hex
[(ch
>> 4) & 0x0f];
442 tmp
[5] = hex
[ch
& 0x0f];
448 if (totalBytesWritten
+ reqLength
> max
) break; /* Doesn't fit..
450 for (cnt
= 0; cnt
< reqLength
; cnt
++) {
451 buffer
[totalBytesWritten
+ cnt
] = tmp
[cnt
];
454 totalBytesWritten
+= reqLength
;
457 } else if (encoding
== kCFStringEncodingUnicode
) {
458 CFIndex extraForBOM
= generatingExternalFile
? sizeof(UniChar
) : 0;
459 numCharsProcessed
= rangeLen
;
460 if (buffer
&& (numCharsProcessed
* (CFIndex
)sizeof(UniChar
) + extraForBOM
> max
)) {
461 numCharsProcessed
= (max
> extraForBOM
) ? ((max
- extraForBOM
) / sizeof(UniChar
)) : 0;
463 totalBytesWritten
= (numCharsProcessed
* sizeof(UniChar
)) + extraForBOM
;
465 if (generatingExternalFile
) { /* Generate BOM */
466 #if defined(__BIG_ENDIAN__)
467 *buffer
++ = 0xfe; *buffer
++ = 0xff;
469 *buffer
++ = 0xff; *buffer
++ = 0xfe;
472 CFStringGetCharacters(string
, CFRangeMake(rangeLoc
, numCharsProcessed
), (UniChar
*)buffer
);
477 const unsigned char *cString
= NULL
;
479 if (!CF_IS_OBJC(CFStringGetTypeID(), string
) && __CFStringEncodingIsSupersetOfASCII(encoding
)) { // Checking for NSString to avoid infinite recursion
480 const unsigned char *ptr
;
481 if ((cString
= CFStringGetCStringPtr(string
, __CFStringGetEightBitStringEncoding()))) {
482 ptr
= (cString
+= rangeLoc
);
483 if (__CFStringGetEightBitStringEncoding() == encoding
) {
484 numCharsProcessed
= (rangeLen
< max
|| buffer
== NULL
? rangeLen
: max
);
485 if (buffer
) memmove(buffer
, cString
, numCharsProcessed
);
486 if (usedBufLen
) *usedBufLen
= numCharsProcessed
;
487 return numCharsProcessed
;
489 while (*ptr
< 0x80 && rangeLen
> 0) {
493 numCharsProcessed
= ptr
- cString
;
495 numCharsProcessed
= (numCharsProcessed
< max
? numCharsProcessed
: max
);
496 memmove(buffer
, cString
, numCharsProcessed
);
497 buffer
+= numCharsProcessed
;
498 max
-= numCharsProcessed
;
500 if (!rangeLen
|| (buffer
&& (max
== 0))) {
501 if (usedBufLen
) *usedBufLen
= numCharsProcessed
;
502 return numCharsProcessed
;
504 rangeLoc
+= numCharsProcessed
;
505 totalBytesWritten
+= numCharsProcessed
;
507 if (!cString
&& (cString
= CFStringGetPascalStringPtr(string
, __CFStringGetEightBitStringEncoding()))) {
508 ptr
= (cString
+= (rangeLoc
+ 1));
509 if (__CFStringGetEightBitStringEncoding() == encoding
) {
510 numCharsProcessed
= (rangeLen
< max
|| buffer
== NULL
? rangeLen
: max
);
511 if (buffer
) memmove(buffer
, cString
, numCharsProcessed
);
512 if (usedBufLen
) *usedBufLen
= numCharsProcessed
;
513 return numCharsProcessed
;
515 while (*ptr
< 0x80 && rangeLen
> 0) {
519 numCharsProcessed
= ptr
- cString
;
521 numCharsProcessed
= (numCharsProcessed
< max
? numCharsProcessed
: max
);
522 memmove(buffer
, cString
, numCharsProcessed
);
523 buffer
+= numCharsProcessed
;
524 max
-= numCharsProcessed
;
526 if (!rangeLen
|| (buffer
&& (max
== 0))) {
527 if (usedBufLen
) *usedBufLen
= numCharsProcessed
;
528 return numCharsProcessed
;
530 rangeLoc
+= numCharsProcessed
;
531 totalBytesWritten
+= numCharsProcessed
;
535 if (!buffer
) max
= 0;
537 // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
538 flags
= (lossByte
? ((unsigned char)lossByte
== 0xFF && encoding
== kCFStringEncodingASCII
? kCFStringEncodingAllowLossyConversion
: CFStringEncodingLossyByteToMask(lossByte
)) : 0) | (generatingExternalFile
? kCFStringEncodingPrependBOM
: 0) | __CFGetASCIICompatibleFlag();
540 if (!cString
&& (cString
= (const char*)CFStringGetCharactersPtr(string
))) { // Must be Unicode string
541 if (CFStringEncodingIsValidEncoding(encoding
)) { // Converter available in CF
542 CFStringEncodingUnicodeToBytes(encoding
, flags
, (const UniChar
*)cString
+ rangeLoc
, rangeLen
, &numCharsProcessed
, buffer
, max
, &totalBytesWritten
);
547 UniChar charBuf
[kCFCharConversionBufferLength
];
548 UInt32 currentLength
;
550 uint32_t lastUsedLen
= 0, lastNumChars
= 0;
552 Boolean isCFBuiltin
= CFStringEncodingIsValidEncoding(encoding
);
553 #define MAX_DECOMP_LEN (6)
555 while (rangeLen
> 0) {
556 currentLength
= (rangeLen
> kCFCharConversionBufferLength
? kCFCharConversionBufferLength
: rangeLen
);
557 CFStringGetCharacters(string
, CFRangeMake(rangeLoc
, currentLength
), charBuf
);
559 // could be in the middle of surrogate pair; back up.
560 if ((rangeLen
> kCFCharConversionBufferLength
) && CFUniCharIsSurrogateHighCharacter(charBuf
[kCFCharConversionBufferLength
- 1])) --currentLength
;
562 if (isCFBuiltin
) { // Converter available in CF
563 if ((result
= CFStringEncodingUnicodeToBytes(encoding
, flags
, charBuf
, currentLength
, &numChars
, buffer
, max
, &usedLen
)) != kCFStringEncodingConversionSuccess
) {
564 if (kCFStringEncodingInvalidInputStream
== result
) {
565 CFRange composedRange
;
567 if ((rangeLen
> kCFCharConversionBufferLength
) && ((currentLength
- numChars
) < MAX_DECOMP_LEN
)) {
568 composedRange
= CFStringGetRangeOfComposedCharactersAtIndex(string
, rangeLoc
+ currentLength
);
570 if ((composedRange
.length
<= MAX_DECOMP_LEN
) && (composedRange
.location
< (rangeLoc
+ numChars
))) {
571 result
= CFStringEncodingUnicodeToBytes(encoding
, flags
, charBuf
, composedRange
.location
- rangeLoc
, &numChars
, buffer
, max
, &usedLen
);
576 if ((kCFStringEncodingConversionSuccess
!= result
) && (lastNumChars
> 0) && (numChars
< MAX_DECOMP_LEN
)) {
577 composedRange
= CFStringGetRangeOfComposedCharactersAtIndex(string
, rangeLoc
);
579 if ((composedRange
.length
<= MAX_DECOMP_LEN
) && (composedRange
.location
< rangeLoc
)) {
580 // Try if the composed range can be converted
581 CFStringGetCharacters(string
, composedRange
, charBuf
);
583 if (CFStringEncodingUnicodeToBytes(encoding
, flags
, charBuf
, composedRange
.length
, &numChars
, NULL
, 0, &usedLen
) == kCFStringEncodingConversionSuccess
) { // OK let's try the last run
584 CFIndex lastRangeLoc
= rangeLoc
- lastNumChars
;
586 currentLength
= composedRange
.location
- lastRangeLoc
;
587 CFStringGetCharacters(string
, CFRangeMake(lastRangeLoc
, currentLength
), charBuf
);
589 if ((result
= CFStringEncodingUnicodeToBytes(encoding
, flags
, charBuf
, currentLength
, &numChars
, (max
? buffer
- lastUsedLen
: NULL
), (max
? max
+ lastUsedLen
: 0), &usedLen
)) == kCFStringEncodingConversionSuccess
) { // OK let's try the last run
590 // Looks good. back up
591 totalBytesWritten
-= lastUsedLen
;
592 numCharsProcessed
-= lastNumChars
;
594 rangeLoc
= lastRangeLoc
;
595 rangeLen
+= lastNumChars
;
598 buffer
-= lastUsedLen
;
607 if (kCFStringEncodingConversionSuccess
!= result
) { // really failed
608 totalBytesWritten
+= usedLen
;
609 numCharsProcessed
+= numChars
;
617 totalBytesWritten
+= usedLen
;
618 numCharsProcessed
+= numChars
;
620 rangeLoc
+= numChars
;
621 rangeLen
-= numChars
;
627 lastUsedLen
= usedLen
; lastNumChars
= numChars
;
628 flags
&= ~kCFStringEncodingPrependBOM
;
632 if (usedBufLen
) *usedBufLen
= totalBytesWritten
;
633 return numCharsProcessed
;
636 #define MAX_STACK_BUFFER_LEN (255)
637 CF_EXPORT Boolean
_CFStringGetFileSystemRepresentation(CFStringRef string
, uint8_t *buffer
, CFIndex maxBufLen
) {
638 #if defined(__MACH__)
639 const UTF16Char
*characters
= CFStringGetCharactersPtr(string
);
642 if (NULL
== characters
) {
643 CFIndex length
= CFStringGetLength(string
);
645 if (length
> MAX_STACK_BUFFER_LEN
) {
646 UTF16Char charactersBuffer
[MAX_STACK_BUFFER_LEN
];
647 CFRange range
= CFRangeMake(0, MAX_STACK_BUFFER_LEN
);
648 uint32_t localUsedBufLen
;
653 CFStringGetCharacters(string
, range
, charactersBuffer
);
654 if (CFUniCharIsSurrogateHighCharacter(charactersBuffer
[range
.length
- 1])) --range
.length
; // Backup for a high surrogate
656 if (!CFUniCharDecompose(charactersBuffer
, range
.length
, NULL
, (void *)buffer
, maxBufLen
- usedBufLen
, &localUsedBufLen
, true, kCFUniCharUTF8Format
, true)) return false;
657 buffer
+= localUsedBufLen
;
658 usedBufLen
+= localUsedBufLen
;
660 length
-= range
.length
;
661 range
.location
+= range
.length
;
662 range
.length
= (length
< MAX_STACK_BUFFER_LEN
? length
: MAX_STACK_BUFFER_LEN
);
665 UTF16Char charactersBuffer
[MAX_STACK_BUFFER_LEN
]; // C99 Variable array
667 CFStringGetCharacters(string
, CFRangeMake(0, length
), charactersBuffer
);
668 if (!CFUniCharDecompose(charactersBuffer
, length
, NULL
, (void *)buffer
, maxBufLen
, &usedBufLen
, true, kCFUniCharUTF8Format
, true)) return false;
669 buffer
+= usedBufLen
;
672 if (!CFUniCharDecompose(characters
, CFStringGetLength(string
), NULL
, (void *)buffer
, maxBufLen
, &usedBufLen
, true, kCFUniCharUTF8Format
, true)) return false;
673 buffer
+= usedBufLen
;
676 if (usedBufLen
< (uint32_t)maxBufLen
) { // Since the filename has its own limit, this is ok for now
683 return CFStringGetCString(string
, buffer
, maxBufLen
, CFStringFileSystemEncoding());