2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
23 /* CFStringEncodings.c
24 Copyright 1999-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
28 #include "CFInternal.h"
29 #include <CoreFoundation/CFString.h>
30 #include <CoreFoundation/CFByteOrder.h>
31 #include "CFUtilitiesPriv.h"
33 #include "CFStringEncodingConverterExt.h"
34 #include "CFUniChar.h"
35 #include "CFUnicodeDecomposition.h"
37 static UInt32 __CFWantsToUseASCIICompatibleConversion
= (UInt32
)-1;
38 CF_INLINE UInt32
__CFGetASCIICompatibleFlag(void) {
39 if (__CFWantsToUseASCIICompatibleConversion
== (UInt32
)-1) {
40 __CFWantsToUseASCIICompatibleConversion
= false;
42 return (__CFWantsToUseASCIICompatibleConversion
? kCFStringEncodingASCIICompatibleConversion
: 0);
45 void _CFStringEncodingSetForceASCIICompatibility(Boolean flag
) {
46 __CFWantsToUseASCIICompatibleConversion
= (flag
? (UInt32
)true : (UInt32
)false);
49 Boolean (*__CFCharToUniCharFunc
)(UInt32 flags
, uint8_t ch
, UniChar
*unicodeChar
) = NULL
;
51 // To avoid early initialization issues, we just initialize this here
52 // This should not be const as it is changed
53 UniChar __CFCharToUniCharTable
[256] = {
54 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
55 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
56 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
57 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
58 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
59 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
60 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
61 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
62 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
63 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
64 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
65 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
66 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
67 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
68 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
69 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
72 void __CFSetCharToUniCharFunc(Boolean (*func
)(UInt32 flags
, UInt8 ch
, UniChar
*unicodeChar
)) {
73 if (__CFCharToUniCharFunc
!= func
) {
75 __CFCharToUniCharFunc
= func
;
77 for (ch
= 128; ch
< 256; ch
++) {
79 __CFCharToUniCharTable
[ch
] = (__CFCharToUniCharFunc(0, ch
, &uch
) ? uch
: 0xFFFD);
81 } else { // If we have no __CFCharToUniCharFunc, assume 128..255 return the value as-is
82 for (ch
= 128; ch
< 256; ch
++) __CFCharToUniCharTable
[ch
] = ch
;
87 __private_extern__
void __CFStrConvertBytesToUnicode(const uint8_t *bytes
, UniChar
*buffer
, CFIndex numChars
) {
89 for (idx
= 0; idx
< numChars
; idx
++) buffer
[idx
] = __CFCharToUniCharTable
[bytes
[idx
]];
93 /* The minimum length the output buffers should be in the above functions
95 #define kCFCharConversionBufferLength 512
98 #define MAX_LOCAL_CHARS (sizeof(buffer->localBuffer) / sizeof(uint8_t))
99 #define MAX_LOCAL_UNICHARS (sizeof(buffer->localBuffer) / sizeof(UniChar))
101 /* Convert a byte stream to ASCII (7-bit!) or Unicode, with a CFVarWidthCharBuffer struct on the stack. false return indicates an error occured during the conversion. The caller needs to free the returned buffer in either ascii or unicode (indicated by isASCII), if shouldFreeChars is true.
102 9/18/98 __CFStringDecodeByteStream now avoids to allocate buffer if buffer->chars is not NULL
103 Added useClientsMemoryPtr; if not-NULL, and the provided memory can be used as is, this is set to true
104 __CFStringDecodeByteStream2() is kept around for any internal clients who might be using it; it should be deprecated
105 !!! converterFlags is only used for the UTF8 converter at this point
107 Boolean
__CFStringDecodeByteStream2(const uint8_t *bytes
, UInt32 len
, CFStringEncoding encoding
, Boolean alwaysUnicode
, CFVarWidthCharBuffer
*buffer
, Boolean
*useClientsMemoryPtr
) {
108 return __CFStringDecodeByteStream3(bytes
, len
, encoding
, alwaysUnicode
, buffer
, useClientsMemoryPtr
, 0);
112 __NSNonLossyErrorMode
= -1,
113 __NSNonLossyASCIIMode
= 0,
114 __NSNonLossyBackslashMode
= 1,
115 __NSNonLossyHexInitialMode
= __NSNonLossyBackslashMode
+ 1,
116 __NSNonLossyHexFinalMode
= __NSNonLossyHexInitialMode
+ 4,
117 __NSNonLossyOctalInitialMode
= __NSNonLossyHexFinalMode
+ 1,
118 __NSNonLossyOctalFinalMode
= __NSNonLossyHexFinalMode
+ 3
121 Boolean
__CFStringDecodeByteStream3(const uint8_t *bytes
, UInt32 len
, CFStringEncoding encoding
, Boolean alwaysUnicode
, CFVarWidthCharBuffer
*buffer
, Boolean
*useClientsMemoryPtr
, UInt32 converterFlags
) {
123 if (useClientsMemoryPtr
) *useClientsMemoryPtr
= false;
125 buffer
->isASCII
= !alwaysUnicode
;
126 buffer
->shouldFreeChars
= false;
127 buffer
->numChars
= 0;
129 if (0 == len
) return true;
131 buffer
->allocator
= (buffer
->allocator
? buffer
->allocator
: __CFGetDefaultAllocator());
133 if ((encoding
== kCFStringEncodingUTF16
) || (encoding
== kCFStringEncodingUTF16BE
) || (encoding
== kCFStringEncodingUTF16LE
)) { // UTF-16
134 const UTF16Char
*src
= (const UTF16Char
*)bytes
;
135 const UTF16Char
*limit
= (const UTF16Char
*)(bytes
+ len
);
138 if (kCFStringEncodingUTF16
== encoding
) {
139 UTF16Char bom
= ((*src
== 0xFFFE) || (*src
== 0xFEFF) ? *(src
++) : 0);
141 #if defined(__BIG_ENDIAN__)
142 if (bom
== 0xFFFE) swap
= true;
144 if (bom
!= 0xFEFF) swap
= true;
146 if (bom
) useClientsMemoryPtr
= NULL
;
148 #if defined(__BIG_ENDIAN__)
149 if (kCFStringEncodingUTF16LE
== encoding
) swap
= true;
151 if (kCFStringEncodingUTF16BE
== encoding
) swap
= true;
155 buffer
->numChars
= limit
- src
;
157 if (useClientsMemoryPtr
&& !swap
) { // If the caller is ready to deal with no-copy situation, and the situation is possible, indicate it...
158 *useClientsMemoryPtr
= true;
159 buffer
->chars
.unicode
= (UniChar
*)src
;
160 buffer
->isASCII
= false;
162 if (buffer
->isASCII
) { // Let's see if we can reduce the Unicode down to ASCII...
163 const UTF16Char
*characters
= src
;
164 UTF16Char mask
= (swap
? 0x80FF : 0xFF80);
166 while (characters
< limit
) {
167 if (*(characters
++) & mask
) {
168 buffer
->isASCII
= false;
174 if (buffer
->isASCII
) {
176 if (NULL
== buffer
->chars
.ascii
) { // we never reallocate when buffer is supplied
177 if (buffer
->numChars
> MAX_LOCAL_CHARS
) {
178 buffer
->chars
.ascii
= CFAllocatorAllocate(buffer
->allocator
, (buffer
->numChars
* sizeof(uint8_t)), 0);
179 buffer
->shouldFreeChars
= true;
181 buffer
->chars
.ascii
= (uint8_t *)buffer
->localBuffer
;
184 dst
= buffer
->chars
.ascii
;
187 while (src
< limit
) *(dst
++) = (*(src
++) >> 8);
189 while (src
< limit
) *(dst
++) = *(src
++);
194 if (NULL
== buffer
->chars
.unicode
) { // we never reallocate when buffer is supplied
195 if (buffer
->numChars
> MAX_LOCAL_UNICHARS
) {
196 buffer
->chars
.unicode
= CFAllocatorAllocate(buffer
->allocator
, (buffer
->numChars
* sizeof(UTF16Char
)), 0);
197 buffer
->shouldFreeChars
= true;
199 buffer
->chars
.unicode
= (UTF16Char
*)buffer
->localBuffer
;
202 dst
= buffer
->chars
.unicode
;
205 while (src
< limit
) *(dst
++) = CFSwapInt16(*(src
++));
207 memmove(dst
, src
, buffer
->numChars
* sizeof(UTF16Char
));
211 } else if ((encoding
== kCFStringEncodingUTF32
) || (encoding
== kCFStringEncodingUTF32BE
) || (encoding
== kCFStringEncodingUTF32LE
)) {
212 const UTF32Char
*src
= (const UTF32Char
*)bytes
;
213 const UTF32Char
*limit
= (const UTF32Char
*)(bytes
+ len
);
216 if (kCFStringEncodingUTF32
== encoding
) {
217 UTF32Char bom
= ((*src
== 0xFFFE0000) || (*src
== 0x0000FEFF) ? *(src
++) : 0);
219 #if defined(__BIG_ENDIAN__)
220 if (bom
== 0xFFFE0000) swap
= true;
222 if (bom
!= 0x0000FEFF) swap
= true;
225 #if defined(__BIG_ENDIAN__)
226 if (kCFStringEncodingUTF32LE
== encoding
) swap
= true;
228 if (kCFStringEncodingUTF32BE
== encoding
) swap
= true;
232 buffer
->numChars
= limit
- src
;
235 // Let's see if we have non-ASCII or non-BMP
236 const UTF32Char
*characters
= src
;
237 UTF32Char asciiMask
= (swap
? 0x80FFFFFF : 0xFFFFFF80);
238 UTF32Char bmpMask
= (swap
? 0x0000FFFF : 0xFFFF0000);
240 while (characters
< limit
) {
241 if (*characters
& asciiMask
) {
242 buffer
->isASCII
= false;
243 if (*characters
& bmpMask
) ++(buffer
->numChars
);
249 if (buffer
->isASCII
) {
251 if (NULL
== buffer
->chars
.ascii
) { // we never reallocate when buffer is supplied
252 if (buffer
->numChars
> MAX_LOCAL_CHARS
) {
253 buffer
->chars
.ascii
= CFAllocatorAllocate(buffer
->allocator
, (buffer
->numChars
* sizeof(uint8_t)), 0);
254 buffer
->shouldFreeChars
= true;
256 buffer
->chars
.ascii
= (uint8_t *)buffer
->localBuffer
;
259 dst
= buffer
->chars
.ascii
;
262 while (src
< limit
) *(dst
++) = (*(src
++) >> 24);
264 while (src
< limit
) *(dst
++) = *(src
++);
267 if (NULL
== buffer
->chars
.unicode
) { // we never reallocate when buffer is supplied
268 if (buffer
->numChars
> MAX_LOCAL_UNICHARS
) {
269 buffer
->chars
.unicode
= CFAllocatorAllocate(buffer
->allocator
, (buffer
->numChars
* sizeof(UTF16Char
)), 0);
270 buffer
->shouldFreeChars
= true;
272 buffer
->chars
.unicode
= (UTF16Char
*)buffer
->localBuffer
;
275 CFUniCharFromUTF32(src
, limit
- src
, buffer
->chars
.unicode
, false,
276 #if defined(__BIG_ENDIAN__)
285 const uint8_t *chars
= (const uint8_t *)bytes
;
286 const uint8_t *end
= chars
+ len
;
289 case kCFStringEncodingNonLossyASCII
: {
290 UTF16Char currentValue
= 0;
292 int8_t mode
= __NSNonLossyASCIIMode
;
294 buffer
->isASCII
= false;
295 buffer
->shouldFreeChars
= !buffer
->chars
.unicode
&& (len
<= MAX_LOCAL_UNICHARS
) ? false : true;
296 buffer
->chars
.unicode
= (buffer
->chars
.unicode
? buffer
->chars
.unicode
: (len
<= MAX_LOCAL_UNICHARS
) ? (UniChar
*)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(UniChar
), 0));
297 buffer
->numChars
= 0;
299 while (chars
< end
) {
300 character
= (*chars
++);
303 case __NSNonLossyASCIIMode
:
304 if (character
== '\\') {
305 mode
= __NSNonLossyBackslashMode
;
306 } else if (character
< 0x80) {
307 currentValue
= character
;
309 mode
= __NSNonLossyErrorMode
;
313 case __NSNonLossyBackslashMode
:
314 if ((character
== 'U') || (character
== 'u')) {
315 mode
= __NSNonLossyHexInitialMode
;
317 } else if ((character
>= '0') && (character
<= '9')) {
318 mode
= __NSNonLossyOctalInitialMode
;
319 currentValue
= character
- '0';
320 } else if (character
== '\\') {
321 mode
= __NSNonLossyASCIIMode
;
322 currentValue
= character
;
324 mode
= __NSNonLossyErrorMode
;
329 if (mode
< __NSNonLossyHexFinalMode
) {
330 if ((character
>= '0') && (character
<= '9')) {
331 currentValue
= (currentValue
<< 4) | (character
- '0');
332 if (++mode
== __NSNonLossyHexFinalMode
) mode
= __NSNonLossyASCIIMode
;
334 if (character
>= 'a') character
-= ('a' - 'A');
335 if ((character
>= 'A') && (character
<= 'F')) {
336 currentValue
= (currentValue
<< 4) | ((character
- 'A') + 10);
337 if (++mode
== __NSNonLossyHexFinalMode
) mode
= __NSNonLossyASCIIMode
;
339 mode
= __NSNonLossyErrorMode
;
343 if ((character
>= '0') && (character
<= '9')) {
344 currentValue
= (currentValue
<< 3) | (character
- '0');
345 if (++mode
== __NSNonLossyOctalFinalMode
) mode
= __NSNonLossyASCIIMode
;
347 mode
= __NSNonLossyErrorMode
;
353 if (mode
== __NSNonLossyASCIIMode
) {
354 buffer
->chars
.unicode
[buffer
->numChars
++] = currentValue
;
355 } else if (mode
== __NSNonLossyErrorMode
) {
359 return (mode
== __NSNonLossyASCIIMode
);
362 case kCFStringEncodingUTF8
:
363 if ((len
>= 3) && (chars
[0] == 0xef) && (chars
[1] == 0xbb) && (chars
[2] == 0xbf)) { // If UTF8 BOM, skip
366 if (0 == len
) return true;
368 if (buffer
->isASCII
) {
369 for (idx
= 0; idx
< len
; idx
++) {
370 if (128 <= chars
[idx
]) {
371 buffer
->isASCII
= false;
376 if (buffer
->isASCII
) {
377 buffer
->numChars
= len
;
378 buffer
->shouldFreeChars
= !buffer
->chars
.ascii
&& (len
<= MAX_LOCAL_CHARS
) ? false : true;
379 buffer
->chars
.ascii
= (buffer
->chars
.ascii
? buffer
->chars
.ascii
: (len
<= MAX_LOCAL_CHARS
) ? (uint8_t *)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(uint8_t), 0));
380 memmove(buffer
->chars
.ascii
, chars
, len
* sizeof(uint8_t));
383 static CFStringEncodingToUnicodeProc __CFFromUTF8
= NULL
;
386 const CFStringEncodingConverter
*converter
= CFStringEncodingGetConverter(kCFStringEncodingUTF8
);
387 __CFFromUTF8
= (CFStringEncodingToUnicodeProc
)converter
->toUnicode
;
390 buffer
->shouldFreeChars
= !buffer
->chars
.unicode
&& (len
<= MAX_LOCAL_UNICHARS
) ? false : true;
391 buffer
->chars
.unicode
= (buffer
->chars
.unicode
? buffer
->chars
.unicode
: (len
<= MAX_LOCAL_UNICHARS
) ? (UniChar
*)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(UniChar
), 0));
392 buffer
->numChars
= 0;
393 while (chars
< end
) {
395 chars
+= __CFFromUTF8(converterFlags
, chars
, end
- chars
, &(buffer
->chars
.unicode
[buffer
->numChars
]), len
- buffer
->numChars
, &numDone
);
398 if (buffer
->shouldFreeChars
) CFAllocatorDeallocate(buffer
->allocator
, buffer
->chars
.unicode
);
399 buffer
->isASCII
= !alwaysUnicode
;
400 buffer
->shouldFreeChars
= false;
401 buffer
->chars
.ascii
= NULL
;
402 buffer
->numChars
= 0;
405 buffer
->numChars
+= numDone
;
411 if (CFStringEncodingIsValidEncoding(encoding
)) {
412 const CFStringEncodingConverter
*converter
= CFStringEncodingGetConverter(encoding
);
413 Boolean isASCIISuperset
= __CFStringEncodingIsSupersetOfASCII(encoding
);
415 if (!converter
) return false;
417 if (!isASCIISuperset
) buffer
->isASCII
= false;
419 if (buffer
->isASCII
) {
420 for (idx
= 0; idx
< len
; idx
++) {
421 if (128 <= chars
[idx
]) {
422 buffer
->isASCII
= false;
428 if (converter
->encodingClass
== kCFStringEncodingConverterCheapEightBit
) {
429 if (buffer
->isASCII
) {
430 buffer
->numChars
= len
;
431 buffer
->shouldFreeChars
= !buffer
->chars
.ascii
&& (len
<= MAX_LOCAL_CHARS
) ? false : true;
432 buffer
->chars
.ascii
= (buffer
->chars
.ascii
? buffer
->chars
.ascii
: (len
<= MAX_LOCAL_CHARS
) ? (uint8_t *)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(uint8_t), 0));
433 memmove(buffer
->chars
.ascii
, chars
, len
* sizeof(uint8_t));
435 buffer
->shouldFreeChars
= !buffer
->chars
.unicode
&& (len
<= MAX_LOCAL_UNICHARS
) ? false : true;
436 buffer
->chars
.unicode
= (buffer
->chars
.unicode
? buffer
->chars
.unicode
: (len
<= MAX_LOCAL_UNICHARS
) ? (UniChar
*)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(UniChar
), 0));
437 buffer
->numChars
= len
;
438 if (kCFStringEncodingASCII
== encoding
|| kCFStringEncodingISOLatin1
== encoding
) {
439 for (idx
= 0; idx
< len
; idx
++) buffer
->chars
.unicode
[idx
] = (UniChar
)chars
[idx
];
441 for (idx
= 0; idx
< len
; idx
++)
442 if (chars
[idx
] < 0x80 && isASCIISuperset
)
443 buffer
->chars
.unicode
[idx
] = (UniChar
)chars
[idx
];
444 else if (!((CFStringEncodingCheapEightBitToUnicodeProc
)converter
->toUnicode
)(0, chars
[idx
], buffer
->chars
.unicode
+ idx
))
449 if (buffer
->isASCII
) {
450 buffer
->numChars
= len
;
451 buffer
->shouldFreeChars
= !buffer
->chars
.ascii
&& (len
<= MAX_LOCAL_CHARS
) ? false : true;
452 buffer
->chars
.ascii
= (buffer
->chars
.ascii
? buffer
->chars
.ascii
: (len
<= MAX_LOCAL_CHARS
) ? (uint8_t *)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, len
* sizeof(uint8_t), 0));
453 memmove(buffer
->chars
.ascii
, chars
, len
* sizeof(uint8_t));
455 UInt32 guessedLength
= CFStringEncodingCharLengthForBytes(encoding
, 0, bytes
, len
);
456 static UInt32 lossyFlag
= (UInt32
)-1;
458 buffer
->shouldFreeChars
= !buffer
->chars
.unicode
&& (guessedLength
<= MAX_LOCAL_UNICHARS
) ? false : true;
459 buffer
->chars
.unicode
= (buffer
->chars
.unicode
? buffer
->chars
.unicode
: (guessedLength
<= MAX_LOCAL_UNICHARS
) ? (UniChar
*)buffer
->localBuffer
: CFAllocatorAllocate(buffer
->allocator
, guessedLength
* sizeof(UniChar
), 0));
461 if (lossyFlag
== (UInt32
)-1) lossyFlag
= (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther
) ? 0 : kCFStringEncodingAllowLossyConversion
);
463 if (CFStringEncodingBytesToUnicode(encoding
, lossyFlag
|__CFGetASCIICompatibleFlag(), bytes
, len
, NULL
, buffer
->chars
.unicode
, (guessedLength
> MAX_LOCAL_UNICHARS
? guessedLength
: MAX_LOCAL_UNICHARS
), &(buffer
->numChars
))) {
464 if (buffer
->shouldFreeChars
) CFAllocatorDeallocate(buffer
->allocator
, buffer
->chars
.unicode
);
465 buffer
->isASCII
= !alwaysUnicode
;
466 buffer
->shouldFreeChars
= false;
467 buffer
->chars
.ascii
= NULL
;
468 buffer
->numChars
= 0;
483 /* Create a byte stream from a CFString backing. Can convert a string piece at a time
484 into a fixed size buffer. Returns number of characters converted.
485 Characters that cannot be converted to the specified encoding are represented
486 with the char specified by lossByte; if 0, then lossy conversion is not allowed
487 and conversion stops, returning partial results.
488 Pass buffer==NULL if you don't care about the converted string (but just the convertability,
489 or number of bytes required, indicated by usedBufLen).
490 Does not zero-terminate. If you want to create Pascal or C string, allow one extra byte at start or end.
492 Note: This function is intended to work through CFString functions, so it should work
493 with NSStrings as well as CFStrings.
495 CFIndex
__CFStringEncodeByteStream(CFStringRef string
, CFIndex rangeLoc
, CFIndex rangeLen
, Boolean generatingExternalFile
, CFStringEncoding encoding
, char lossByte
, uint8_t *buffer
, CFIndex max
, CFIndex
*usedBufLen
) {
496 CFIndex totalBytesWritten
= 0; /* Number of written bytes */
497 CFIndex numCharsProcessed
= 0; /* Number of processed chars */
498 const UniChar
*unichars
;
500 if (encoding
== kCFStringEncodingUTF8
&& (unichars
= CFStringGetCharactersPtr(string
))) {
501 static CFStringEncodingToBytesProc __CFToUTF8
= NULL
;
504 const CFStringEncodingConverter
*utf8Converter
= CFStringEncodingGetConverter(kCFStringEncodingUTF8
);
505 __CFToUTF8
= (CFStringEncodingToBytesProc
)utf8Converter
->toBytes
;
507 numCharsProcessed
= __CFToUTF8((generatingExternalFile
? kCFStringEncodingPrependBOM
: 0), unichars
+ rangeLoc
, rangeLen
, buffer
, (buffer
? max
: 0), &totalBytesWritten
);
509 } else if (encoding
== kCFStringEncodingNonLossyASCII
) {
510 const char *hex
= "0123456789abcdef";
512 CFStringInlineBuffer buf
;
513 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(rangeLoc
, rangeLen
));
514 while (numCharsProcessed
< rangeLen
) {
515 CFIndex reqLength
; /* Required number of chars to encode this UniChar */
518 ch
= CFStringGetCharacterFromInlineBuffer(&buf
, numCharsProcessed
);
519 if ((ch
>= ' ' && ch
<= '~' && ch
!= '\\') || (ch
== '\n' || ch
== '\r' || ch
== '\t')) {
526 } else if (ch
< 256) { /* \nnn; note that this is not NEXTSTEP encoding but a (small) UniChar */
527 tmp
[1] = '0' + (ch
>> 6);
528 tmp
[2] = '0' + ((ch
>> 3) & 7);
529 tmp
[3] = '0' + (ch
& 7);
531 } else { /* \Unnnn */
532 tmp
[1] = 'u'; // Changed to small+u in order to be aligned with Java
533 tmp
[2] = hex
[(ch
>> 12) & 0x0f];
534 tmp
[3] = hex
[(ch
>> 8) & 0x0f];
535 tmp
[4] = hex
[(ch
>> 4) & 0x0f];
536 tmp
[5] = hex
[ch
& 0x0f];
542 if (totalBytesWritten
+ reqLength
> max
) break; /* Doesn't fit..
544 for (cnt
= 0; cnt
< reqLength
; cnt
++) {
545 buffer
[totalBytesWritten
+ cnt
] = tmp
[cnt
];
548 totalBytesWritten
+= reqLength
;
551 } else if ((encoding
== kCFStringEncodingUTF16
) || (encoding
== kCFStringEncodingUTF16BE
) || (encoding
== kCFStringEncodingUTF16LE
)) {
552 CFIndex extraForBOM
= (generatingExternalFile
&& (encoding
== kCFStringEncodingUTF16
) ? sizeof(UniChar
) : 0);
553 numCharsProcessed
= rangeLen
;
554 if (buffer
&& (numCharsProcessed
* (CFIndex
)sizeof(UniChar
) + extraForBOM
> max
)) {
555 numCharsProcessed
= (max
> extraForBOM
) ? ((max
- extraForBOM
) / sizeof(UniChar
)) : 0;
557 totalBytesWritten
= (numCharsProcessed
* sizeof(UniChar
)) + extraForBOM
;
559 if (extraForBOM
) { /* Generate BOM */
560 #if defined(__BIG_ENDIAN__)
561 *buffer
++ = 0xfe; *buffer
++ = 0xff;
563 *buffer
++ = 0xff; *buffer
++ = 0xfe;
566 CFStringGetCharacters(string
, CFRangeMake(rangeLoc
, numCharsProcessed
), (UniChar
*)buffer
);
568 #if defined(__BIG_ENDIAN__)
569 kCFStringEncodingUTF16LE
571 kCFStringEncodingUTF16BE
573 == encoding
) { // Need to swap
574 UTF16Char
*characters
= (UTF16Char
*)buffer
;
575 const UTF16Char
*limit
= characters
+ numCharsProcessed
;
577 while (characters
< limit
) {
578 *characters
= CFSwapInt16(*characters
);
583 } else if ((encoding
== kCFStringEncodingUTF32
) || (encoding
== kCFStringEncodingUTF32BE
) || (encoding
== kCFStringEncodingUTF32LE
)) {
585 CFStringInlineBuffer buf
;
586 UTF32Char
*characters
= (UTF32Char
*)buffer
;
588 #if defined(__BIG_ENDIAN__)
589 bool swap
= (encoding
== kCFStringEncodingUTF32LE
? true : false);
591 bool swap
= (encoding
== kCFStringEncodingUTF32BE
? true : false);
594 if (generatingExternalFile
&& (encoding
== kCFStringEncodingUTF32
)) {
595 totalBytesWritten
+= sizeof(UTF32Char
);
597 if (totalBytesWritten
> max
) { // insufficient buffer
598 totalBytesWritten
= 0;
600 #if defined(__BIG_ENDIAN__)
601 *(characters
++) = 0x0000FEFF;
603 *(characters
++) = 0xFFFE0000;
609 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(rangeLoc
, rangeLen
));
610 while (numCharsProcessed
< rangeLen
) {
611 character
= CFStringGetCharacterFromInlineBuffer(&buf
, numCharsProcessed
);
613 if (CFUniCharIsSurrogateHighCharacter(character
)) {
614 UTF16Char otherCharacter
;
616 if (((numCharsProcessed
+ 1) < rangeLen
) && CFUniCharIsSurrogateLowCharacter((otherCharacter
= CFStringGetCharacterFromInlineBuffer(&buf
, numCharsProcessed
+ 1)))) {
617 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherCharacter
);
618 } else if (lossByte
) {
619 character
= lossByte
;
623 } else if (CFUniCharIsSurrogateLowCharacter(character
)) {
625 character
= lossByte
;
631 totalBytesWritten
+= sizeof(UTF32Char
);
634 if (totalBytesWritten
> max
) {
635 totalBytesWritten
-= sizeof(UTF32Char
);
638 *(characters
++) = (swap
? CFSwapInt32(character
) : character
);
641 numCharsProcessed
+= (character
> 0xFFFF ? 2 : 1);
646 const unsigned char *cString
= NULL
;
647 BOOL isASCIISuperset
= __CFStringEncodingIsSupersetOfASCII(encoding
);
649 if (!CF_IS_OBJC(CFStringGetTypeID(), string
) && isASCIISuperset
) { // Checking for NSString to avoid infinite recursion
650 const unsigned char *ptr
;
651 if ((cString
= CFStringGetCStringPtr(string
, __CFStringGetEightBitStringEncoding()))) {
652 ptr
= (cString
+= rangeLoc
);
653 if (__CFStringGetEightBitStringEncoding() == encoding
) {
654 numCharsProcessed
= (rangeLen
< max
|| buffer
== NULL
? rangeLen
: max
);
655 if (buffer
) memmove(buffer
, cString
, numCharsProcessed
);
656 if (usedBufLen
) *usedBufLen
= numCharsProcessed
;
657 return numCharsProcessed
;
659 while (*ptr
< 0x80 && rangeLen
> 0) {
663 numCharsProcessed
= ptr
- cString
;
665 numCharsProcessed
= (numCharsProcessed
< max
? numCharsProcessed
: max
);
666 memmove(buffer
, cString
, numCharsProcessed
);
667 buffer
+= numCharsProcessed
;
668 max
-= numCharsProcessed
;
670 if (!rangeLen
|| (buffer
&& (max
== 0))) {
671 if (usedBufLen
) *usedBufLen
= numCharsProcessed
;
672 return numCharsProcessed
;
674 rangeLoc
+= numCharsProcessed
;
675 totalBytesWritten
+= numCharsProcessed
;
677 if (!cString
&& (cString
= CFStringGetPascalStringPtr(string
, __CFStringGetEightBitStringEncoding()))) {
678 ptr
= (cString
+= (rangeLoc
+ 1));
679 if (__CFStringGetEightBitStringEncoding() == encoding
) {
680 numCharsProcessed
= (rangeLen
< max
|| buffer
== NULL
? rangeLen
: max
);
681 if (buffer
) memmove(buffer
, cString
, numCharsProcessed
);
682 if (usedBufLen
) *usedBufLen
= numCharsProcessed
;
683 return numCharsProcessed
;
685 while (*ptr
< 0x80 && rangeLen
> 0) {
689 numCharsProcessed
= ptr
- cString
;
691 numCharsProcessed
= (numCharsProcessed
< max
? numCharsProcessed
: max
);
692 memmove(buffer
, cString
, numCharsProcessed
);
693 buffer
+= numCharsProcessed
;
694 max
-= numCharsProcessed
;
696 if (!rangeLen
|| (buffer
&& (max
== 0))) {
697 if (usedBufLen
) *usedBufLen
= numCharsProcessed
;
698 return numCharsProcessed
;
700 rangeLoc
+= numCharsProcessed
;
701 totalBytesWritten
+= numCharsProcessed
;
705 if (!buffer
) max
= 0;
707 // Special case for Foundation. When lossByte == 0xFF && encoding kCFStringEncodingASCII, we do the default ASCII fallback conversion
708 // Aki 11/24/04 __CFGetASCIICompatibleFlag() is called only for non-ASCII superset encodings. Otherwise, it could lead to a deadlock (see 3890536).
709 flags
= (lossByte
? ((unsigned char)lossByte
== 0xFF && encoding
== kCFStringEncodingASCII
? kCFStringEncodingAllowLossyConversion
: CFStringEncodingLossyByteToMask(lossByte
)) : 0) | (generatingExternalFile
? kCFStringEncodingPrependBOM
: 0) | (isASCIISuperset
? 0 : __CFGetASCIICompatibleFlag());
711 if (!cString
&& (cString
= (const char*)CFStringGetCharactersPtr(string
))) { // Must be Unicode string
712 if (CFStringEncodingIsValidEncoding(encoding
)) { // Converter available in CF
713 CFStringEncodingUnicodeToBytes(encoding
, flags
, (const UniChar
*)cString
+ rangeLoc
, rangeLen
, &numCharsProcessed
, buffer
, max
, &totalBytesWritten
);
718 UniChar charBuf
[kCFCharConversionBufferLength
];
719 UInt32 currentLength
;
721 uint32_t lastUsedLen
= 0, lastNumChars
= 0;
723 Boolean isCFBuiltin
= CFStringEncodingIsValidEncoding(encoding
);
724 #define MAX_DECOMP_LEN (6)
726 while (rangeLen
> 0) {
727 currentLength
= (rangeLen
> kCFCharConversionBufferLength
? kCFCharConversionBufferLength
: rangeLen
);
728 CFStringGetCharacters(string
, CFRangeMake(rangeLoc
, currentLength
), charBuf
);
730 // could be in the middle of surrogate pair; back up.
731 if ((rangeLen
> kCFCharConversionBufferLength
) && CFUniCharIsSurrogateHighCharacter(charBuf
[kCFCharConversionBufferLength
- 1])) --currentLength
;
733 if (isCFBuiltin
) { // Converter available in CF
734 if ((result
= CFStringEncodingUnicodeToBytes(encoding
, flags
, charBuf
, currentLength
, &numChars
, buffer
, max
, &usedLen
)) != kCFStringEncodingConversionSuccess
) {
735 if (kCFStringEncodingInvalidInputStream
== result
) {
736 CFRange composedRange
;
738 if ((rangeLen
> kCFCharConversionBufferLength
) && ((currentLength
- numChars
) < MAX_DECOMP_LEN
)) {
739 composedRange
= CFStringGetRangeOfComposedCharactersAtIndex(string
, rangeLoc
+ currentLength
);
741 if ((composedRange
.length
<= MAX_DECOMP_LEN
) && (composedRange
.location
< (rangeLoc
+ numChars
))) {
742 result
= CFStringEncodingUnicodeToBytes(encoding
, flags
, charBuf
, composedRange
.location
- rangeLoc
, &numChars
, buffer
, max
, &usedLen
);
747 if ((kCFStringEncodingConversionSuccess
!= result
) && (lastNumChars
> 0) && (numChars
< MAX_DECOMP_LEN
)) {
748 composedRange
= CFStringGetRangeOfComposedCharactersAtIndex(string
, rangeLoc
);
750 if ((composedRange
.length
<= MAX_DECOMP_LEN
) && (composedRange
.location
< rangeLoc
)) {
751 // Try if the composed range can be converted
752 CFStringGetCharacters(string
, composedRange
, charBuf
);
754 if (CFStringEncodingUnicodeToBytes(encoding
, flags
, charBuf
, composedRange
.length
, &numChars
, NULL
, 0, &usedLen
) == kCFStringEncodingConversionSuccess
) { // OK let's try the last run
755 CFIndex lastRangeLoc
= rangeLoc
- lastNumChars
;
757 currentLength
= composedRange
.location
- lastRangeLoc
;
758 CFStringGetCharacters(string
, CFRangeMake(lastRangeLoc
, currentLength
), charBuf
);
760 if ((result
= CFStringEncodingUnicodeToBytes(encoding
, flags
, charBuf
, currentLength
, &numChars
, (max
? buffer
- lastUsedLen
: NULL
), (max
? max
+ lastUsedLen
: 0), &usedLen
)) == kCFStringEncodingConversionSuccess
) { // OK let's try the last run
761 // Looks good. back up
762 totalBytesWritten
-= lastUsedLen
;
763 numCharsProcessed
-= lastNumChars
;
765 rangeLoc
= lastRangeLoc
;
766 rangeLen
+= lastNumChars
;
769 buffer
-= lastUsedLen
;
778 if (kCFStringEncodingConversionSuccess
!= result
) { // really failed
779 totalBytesWritten
+= usedLen
;
780 numCharsProcessed
+= numChars
;
788 totalBytesWritten
+= usedLen
;
789 numCharsProcessed
+= numChars
;
791 rangeLoc
+= numChars
;
792 rangeLen
-= numChars
;
798 lastUsedLen
= usedLen
; lastNumChars
= numChars
;
799 flags
&= ~kCFStringEncodingPrependBOM
;
803 if (usedBufLen
) *usedBufLen
= totalBytesWritten
;
804 return numCharsProcessed
;
807 CFStringRef
CFStringCreateWithFileSystemRepresentation(CFAllocatorRef alloc
, const char *buffer
) {
808 return CFStringCreateWithCString(alloc
, buffer
, CFStringFileSystemEncoding());
811 CFIndex
CFStringGetMaximumSizeOfFileSystemRepresentation(CFStringRef string
) {
812 CFIndex len
= CFStringGetLength(string
);
813 CFStringEncoding enc
= CFStringGetFastestEncoding(string
);
815 case kCFStringEncodingASCII
:
816 case kCFStringEncodingMacRoman
:
823 Boolean
CFStringGetFileSystemRepresentation(CFStringRef string
, char *buffer
, CFIndex maxBufLen
) {
824 #if defined(__MACH__)
825 #define MAX_STACK_BUFFER_LEN (255)
826 const UTF16Char
*characters
= CFStringGetCharactersPtr(string
);
829 if (NULL
== characters
) {
830 CFIndex length
= CFStringGetLength(string
);
832 if (length
> MAX_STACK_BUFFER_LEN
) {
833 UTF16Char charactersBuffer
[MAX_STACK_BUFFER_LEN
];
834 CFRange range
= CFRangeMake(0, MAX_STACK_BUFFER_LEN
);
835 uint32_t localUsedBufLen
;
840 CFStringGetCharacters(string
, range
, charactersBuffer
);
841 if (CFUniCharIsSurrogateHighCharacter(charactersBuffer
[range
.length
- 1])) --range
.length
; // Backup for a high surrogate
843 if (!CFUniCharDecompose(charactersBuffer
, range
.length
, NULL
, (void *)buffer
, maxBufLen
- usedBufLen
, &localUsedBufLen
, true, kCFUniCharUTF8Format
, true)) return false;
844 buffer
+= localUsedBufLen
;
845 usedBufLen
+= localUsedBufLen
;
847 length
-= range
.length
;
848 range
.location
+= range
.length
;
849 range
.length
= (length
< MAX_STACK_BUFFER_LEN
? length
: MAX_STACK_BUFFER_LEN
);
852 UTF16Char charactersBuffer
[MAX_STACK_BUFFER_LEN
];
854 CFStringGetCharacters(string
, CFRangeMake(0, length
), charactersBuffer
);
855 if (!CFUniCharDecompose(charactersBuffer
, length
, NULL
, (void *)buffer
, maxBufLen
, &usedBufLen
, true, kCFUniCharUTF8Format
, true)) return false;
856 buffer
+= usedBufLen
;
859 if (!CFUniCharDecompose(characters
, CFStringGetLength(string
), NULL
, (void *)buffer
, maxBufLen
, &usedBufLen
, true, kCFUniCharUTF8Format
, true)) return false;
860 buffer
+= usedBufLen
;
863 if (usedBufLen
< (uint32_t)maxBufLen
) { // Since the filename has its own limit, this is ok for now
870 return CFStringGetCString(string
, buffer
, maxBufLen
, CFStringFileSystemEncoding());
874 Boolean
_CFStringGetFileSystemRepresentation(CFStringRef string
, uint8_t *buffer
, CFIndex maxBufLen
) {
875 return CFStringGetFileSystemRepresentation(string
, buffer
, maxBufLen
);