]> git.saurik.com Git - apple/cf.git/blob - CFStringEncodingConverter.c
CF-476.10.tar.gz
[apple/cf.git] / CFStringEncodingConverter.c
1 /*
2 * Copyright (c) 2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFStringEncodingConverter.c
24 Copyright 1998-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
26 */
27
28 #include "CFInternal.h"
29 #include <CoreFoundation/CFArray.h>
30 #include <CoreFoundation/CFDictionary.h>
31 #include "CFUniChar.h"
32 #include "CFPriv.h"
33 #include "CFUnicodeDecomposition.h"
34 #include "CFStringEncodingConverterExt.h"
35 #include "CFStringEncodingConverterPriv.h"
36 #include <stdlib.h>
37 #if !defined(__WIN32__)
38 #include <pthread.h>
39 #endif
40
41
42 /* Macros
43 */
44 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->_toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->toBytes)(flags,chars,numChars,bytes,max,used))
45 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->_toUnicode ? (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->toUnicode)(flags,bytes,numBytes,chars,max,used))
46
47 #define ASCIINewLine 0x0a
48 #define kSurrogateHighStart 0xD800
49 #define kSurrogateHighEnd 0xDBFF
50 #define kSurrogateLowStart 0xDC00
51 #define kSurrogateLowEnd 0xDFFF
52
53 /* Mapping 128..255 to lossy ASCII
54 */
55 static const struct {
56 unsigned char chars[4];
57 } _toLossyASCIITable[] = {
58 {{' ', 0, 0, 0}}, // NO-BREAK SPACE
59 {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
60 {{'c', 0, 0, 0}}, // CENT SIGN
61 {{'L', 0, 0, 0}}, // POUND SIGN
62 {{'$', 0, 0, 0}}, // CURRENCY SIGN
63 {{'Y', 0, 0, 0}}, // YEN SIGN
64 {{'|', 0, 0, 0}}, // BROKEN BAR
65 {{0, 0, 0, 0}}, // SECTION SIGN
66 {{0, 0, 0, 0}}, // DIAERESIS
67 {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
68 {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
69 {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
70 {{0, 0, 0, 0}}, // NOT SIGN
71 {{'-', 0, 0, 0}}, // SOFT HYPHEN
72 {{'(', 'R', ')', 0}}, // REGISTERED SIGN
73 {{0, 0, 0, 0}}, // MACRON
74 {{0, 0, 0, 0}}, // DEGREE SIGN
75 {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
76 {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
77 {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
78 {{0, 0, 0, 0}}, // ACUTE ACCENT
79 {{0, 0, 0, 0}}, // MICRO SIGN
80 {{0, 0, 0, 0}}, // PILCROW SIGN
81 {{0, 0, 0, 0}}, // MIDDLE DOT
82 {{0, 0, 0, 0}}, // CEDILLA
83 {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
84 {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
85 {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
86 {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
87 {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
88 {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
89 {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
90 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
91 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
92 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
93 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
94 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
95 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
96 {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
97 {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
98 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
99 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
100 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
101 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
102 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
103 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
104 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
105 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
106 {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
107 {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
108 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
109 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
110 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
111 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
112 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
113 {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
114 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
115 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
116 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
117 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
118 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
119 {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
120 {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
121 {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
122 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
123 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
124 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
125 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
126 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
127 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
128 {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
129 {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
130 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
131 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
132 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
133 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
134 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
135 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
136 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
137 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
138 {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
139 {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
140 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
141 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
142 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
143 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
144 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
145 {{'/', 0, 0, 0}}, // DIVISION SIGN
146 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
147 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
148 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
149 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
150 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
151 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
152 {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
153 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
154 };
155
156 CF_INLINE CFIndex __CFToASCIILatin1Fallback(UniChar character, uint8_t *bytes, CFIndex maxByteLen) {
157 const uint8_t *losChars = (const uint8_t*)_toLossyASCIITable + (character - 0xA0) * sizeof(uint8_t[4]);
158 CFIndex numBytes = 0;
159 CFIndex idx, max = (maxByteLen && (maxByteLen < 4) ? maxByteLen : 4);
160
161 for (idx = 0;idx < max;idx++) {
162 if (losChars[idx]) {
163 if (maxByteLen) bytes[idx] = losChars[idx];
164 ++numBytes;
165 } else {
166 break;
167 }
168 }
169
170 return numBytes;
171 }
172
173 static CFIndex __CFDefaultToBytesFallbackProc(const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
174 CFIndex processCharLen = 1, filledBytesLen = 1;
175 uint8_t byte = '?';
176
177 if (*characters < 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
178 byte = (uint8_t)(*characters - 0x80);
179 } else if (*characters < 0x100) {
180 *usedByteLen = __CFToASCIILatin1Fallback(*characters, bytes, maxByteLen);
181 return 1;
182 } else if (*characters >= kSurrogateHighStart && *characters <= kSurrogateLowEnd) {
183 processCharLen = (numChars > 1 && *characters <= kSurrogateLowStart && *(characters + 1) >= kSurrogateLowStart && *(characters + 1) <= kSurrogateLowEnd ? 2 : 1);
184 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceCharacterSet)) {
185 byte = ' ';
186 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceAndNewlineCharacterSet)) {
187 byte = ASCIINewLine;
188 } else if (*characters == 0x2026) { // ellipsis
189 if (0 == maxByteLen) {
190 filledBytesLen = 3;
191 } else if (maxByteLen > 2) {
192 memset(bytes, '.', 3);
193 *usedByteLen = 3;
194 return processCharLen;
195 }
196 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharDecomposableCharacterSet)) {
197 UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
198
199 (void)CFUniCharDecomposeCharacter(*characters, decomposed, MAX_DECOMPOSED_LENGTH);
200 if (*decomposed < 0x80) {
201 byte = (uint8_t)(*decomposed);
202 } else {
203 UTF16Char theChar = *decomposed;
204
205 return __CFDefaultToBytesFallbackProc(&theChar, 1, bytes, maxByteLen, usedByteLen);
206 }
207 }
208
209 if (maxByteLen) *bytes = byte;
210 *usedByteLen = filledBytesLen;
211 return processCharLen;
212 }
213
214 static CFIndex __CFDefaultToUnicodeFallbackProc(const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
215 if (maxCharLen) *characters = (UniChar)'?';
216 *usedCharLen = 1;
217 return 1;
218 }
219
220 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
221 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
222
223 #define EXTRA_BASE (0x0F00)
224
225 /* Wrapper funcs for non-standard converters
226 */
227 static CFIndex __CFToBytesCheapEightBitWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
228 CFIndex processedCharLen = 0;
229 CFIndex length = (maxByteLen && (maxByteLen < numChars) ? maxByteLen : numChars);
230 uint8_t byte;
231
232 while (processedCharLen < length) {
233 if (!((CFStringEncodingCheapEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], &byte)) break;
234
235 if (maxByteLen) bytes[processedCharLen] = byte;
236 processedCharLen++;
237 }
238
239 *usedByteLen = processedCharLen;
240 return processedCharLen;
241 }
242
243 static CFIndex __CFToUnicodeCheapEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
244 CFIndex processedByteLen = 0;
245 CFIndex length = (maxCharLen && (maxCharLen < numBytes) ? maxCharLen : numBytes);
246 UniChar character;
247
248 while (processedByteLen < length) {
249 if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
250
251 if (maxCharLen) characters[processedByteLen] = character;
252 processedByteLen++;
253 }
254
255 *usedCharLen = processedByteLen;
256 return processedByteLen;
257 }
258
259 static CFIndex __CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
260 CFIndex processedByteLen = 0;
261 CFIndex theUsedCharLen = 0;
262 UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
263 CFIndex usedLen;
264 UniChar character;
265 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
266
267 while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
268 if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
269
270 if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
271 CFIndex idx;
272
273 usedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
274 *usedCharLen = theUsedCharLen;
275
276 for (idx = 0;idx < usedLen;idx++) {
277 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
278 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
279 theUsedCharLen += 2;
280 if (maxCharLen) {
281 charBuffer[idx] = charBuffer[idx] - 0x10000;
282 *(characters++) = (UniChar)(charBuffer[idx] >> 10) + 0xD800UL;
283 *(characters++) = (UniChar)(charBuffer[idx] & 0x3FF) + 0xDC00UL;
284 }
285 } else {
286 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
287 ++theUsedCharLen;
288 *(characters++) = charBuffer[idx];
289 }
290 }
291 } else {
292 if (maxCharLen) *(characters++) = character;
293 ++theUsedCharLen;
294 }
295 processedByteLen++;
296 }
297
298 *usedCharLen = theUsedCharLen;
299 return processedByteLen;
300 }
301
302 static CFIndex __CFToBytesStandardEightBitWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
303 CFIndex processedCharLen = 0;
304 uint8_t byte;
305 CFIndex usedLen;
306
307 *usedByteLen = 0;
308
309 while (numChars && (!maxByteLen || (*usedByteLen < maxByteLen))) {
310 if (!(usedLen = ((CFStringEncodingStandardEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters, numChars, &byte))) break;
311
312 if (maxByteLen) bytes[*usedByteLen] = byte;
313 (*usedByteLen)++;
314 characters += usedLen;
315 numChars -= usedLen;
316 processedCharLen += usedLen;
317 }
318
319 return processedCharLen;
320 }
321
322 static CFIndex __CFToUnicodeStandardEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
323 CFIndex processedByteLen = 0;
324 #if 0 || 0
325 UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
326 #else
327 UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
328 #endif
329 CFIndex usedLen;
330
331 *usedCharLen = 0;
332
333 while ((processedByteLen < numBytes) && (!maxCharLen || (*usedCharLen < maxCharLen))) {
334 if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
335
336 if (maxCharLen) {
337 CFIndex idx;
338
339 if (*usedCharLen + usedLen > maxCharLen) break;
340
341 for (idx = 0;idx < usedLen;idx++) {
342 characters[*usedCharLen + idx] = charBuffer[idx];
343 }
344 }
345 *usedCharLen += usedLen;
346 processedByteLen++;
347 }
348
349 return processedByteLen;
350 }
351
352 static CFIndex __CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
353 CFIndex processedByteLen = 0;
354 #if 0 || 0
355 UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
356 #else
357 UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
358 #endif
359 UTF32Char decompBuffer[MAX_DECOMPOSED_LENGTH];
360 CFIndex usedLen;
361 CFIndex decompedLen;
362 CFIndex idx, decompIndex;
363 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
364 CFIndex theUsedCharLen = 0;
365
366 while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
367 if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
368
369 for (idx = 0;idx < usedLen;idx++) {
370 if (CFUniCharIsDecomposableCharacter(charBuffer[idx], isHFSPlus)) {
371 decompedLen = CFUniCharDecomposeCharacter(charBuffer[idx], decompBuffer, MAX_DECOMPOSED_LENGTH);
372 *usedCharLen = theUsedCharLen;
373
374 for (decompIndex = 0;decompIndex < decompedLen;decompIndex++) {
375 if (decompBuffer[decompIndex] > 0xFFFF) { // Non-BMP
376 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
377 theUsedCharLen += 2;
378 if (maxCharLen) {
379 charBuffer[idx] = charBuffer[idx] - 0x10000;
380 *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
381 *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
382 }
383 } else {
384 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
385 ++theUsedCharLen;
386 *(characters++) = charBuffer[idx];
387 }
388 }
389 } else {
390 if (maxCharLen) *(characters++) = charBuffer[idx];
391 ++theUsedCharLen;
392 }
393 }
394 processedByteLen++;
395 }
396
397 *usedCharLen = theUsedCharLen;
398 return processedByteLen;
399 }
400
401 static CFIndex __CFToBytesCheapMultiByteWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
402 CFIndex processedCharLen = 0;
403 #if 0 || 0
404 uint8_t byteBuffer[20]; // Dynamic stack allocation is GNU specific
405 #else
406 uint8_t byteBuffer[((const _CFEncodingConverter*)converter)->maxLen];
407 #endif
408 CFIndex usedLen;
409
410 *usedByteLen = 0;
411
412 while ((processedCharLen < numChars) && (!maxByteLen || (*usedByteLen < maxByteLen))) {
413 if (!(usedLen = ((CFStringEncodingCheapMultiByteToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], byteBuffer))) break;
414
415 if (maxByteLen) {
416 CFIndex idx;
417
418 if (*usedByteLen + usedLen > maxByteLen) break;
419
420 for (idx = 0;idx <usedLen;idx++) {
421 bytes[*usedByteLen + idx] = byteBuffer[idx];
422 }
423 }
424
425 *usedByteLen += usedLen;
426 processedCharLen++;
427 }
428
429 return processedCharLen;
430 }
431
432 static CFIndex __CFToUnicodeCheapMultiByteWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
433 CFIndex processedByteLen = 0;
434 UniChar character;
435 CFIndex usedLen;
436
437 *usedCharLen = 0;
438
439 while (numBytes && (!maxCharLen || (*usedCharLen < maxCharLen))) {
440 if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
441
442 if (maxCharLen) *(characters++) = character;
443 (*usedCharLen)++;
444 processedByteLen += usedLen;
445 bytes += usedLen;
446 numBytes -= usedLen;
447 }
448
449 return processedByteLen;
450 }
451
452 static CFIndex __CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
453 CFIndex processedByteLen = 0;
454 UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
455 UniChar character;
456 CFIndex usedLen;
457 CFIndex decomposedLen;
458 CFIndex theUsedCharLen = 0;
459 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
460
461 while (numBytes && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
462 if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
463
464 if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
465 CFIndex idx;
466
467 decomposedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
468 *usedCharLen = theUsedCharLen;
469
470 for (idx = 0;idx < decomposedLen;idx++) {
471 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
472 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
473 theUsedCharLen += 2;
474 if (maxCharLen) {
475 charBuffer[idx] = charBuffer[idx] - 0x10000;
476 *(characters++) = (UniChar)(charBuffer[idx] >> 10) + 0xD800UL;
477 *(characters++) = (UniChar)(charBuffer[idx] & 0x3FF) + 0xDC00UL;
478 }
479 } else {
480 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
481 ++theUsedCharLen;
482 *(characters++) = charBuffer[idx];
483 }
484 }
485 } else {
486 if (maxCharLen) *(characters++) = character;
487 ++theUsedCharLen;
488 }
489
490 processedByteLen += usedLen;
491 bytes += usedLen;
492 numBytes -= usedLen;
493 }
494 *usedCharLen = theUsedCharLen;
495 return processedByteLen;
496 }
497
498 /* static functions
499 */
500 static _CFConverterEntry __CFConverterEntryASCII = {
501 kCFStringEncodingASCII, NULL,
502 "Western (ASCII)", {"us-ascii", "ascii", "iso-646-us", NULL}, NULL, NULL, NULL, NULL,
503 kCFStringEncodingMacRoman // We use string encoding's script range here
504 };
505
506 static _CFConverterEntry __CFConverterEntryISOLatin1 = {
507 kCFStringEncodingISOLatin1, NULL,
508 "Western (ISO Latin 1)", {"iso-8859-1", "latin1","iso-latin-1", NULL}, NULL, NULL, NULL, NULL,
509 kCFStringEncodingMacRoman // We use string encoding's script range here
510 };
511
512 static _CFConverterEntry __CFConverterEntryMacRoman = {
513 kCFStringEncodingMacRoman, NULL,
514 "Western (Mac OS Roman)", {"macintosh", "mac", "x-mac-roman", NULL}, NULL, NULL, NULL, NULL,
515 kCFStringEncodingMacRoman // We use string encoding's script range here
516 };
517
518 static _CFConverterEntry __CFConverterEntryWinLatin1 = {
519 kCFStringEncodingWindowsLatin1, NULL,
520 "Western (Windows Latin 1)", {"windows-1252", "cp1252", "windows latin1", NULL}, NULL, NULL, NULL, NULL,
521 kCFStringEncodingMacRoman // We use string encoding's script range here
522 };
523
524 static _CFConverterEntry __CFConverterEntryNextStepLatin = {
525 kCFStringEncodingNextStepLatin, NULL,
526 "Western (NextStep)", {"x-nextstep", NULL, NULL, NULL}, NULL, NULL, NULL, NULL,
527 kCFStringEncodingMacRoman // We use string encoding's script range here
528 };
529
530 static _CFConverterEntry __CFConverterEntryUTF8 = {
531 kCFStringEncodingUTF8, NULL,
532 "UTF-8", {"utf-8", "unicode-1-1-utf8", NULL, NULL}, NULL, NULL, NULL, NULL,
533 kCFStringEncodingUnicode // We use string encoding's script range here
534 };
535
536 CF_INLINE _CFConverterEntry *__CFStringEncodingConverterGetEntry(uint32_t encoding) {
537 switch (encoding) {
538 case kCFStringEncodingInvalidId:
539 case kCFStringEncodingASCII:
540 return &__CFConverterEntryASCII;
541
542 case kCFStringEncodingISOLatin1:
543 return &__CFConverterEntryISOLatin1;
544
545 case kCFStringEncodingMacRoman:
546 return &__CFConverterEntryMacRoman;
547
548 case kCFStringEncodingWindowsLatin1:
549 return &__CFConverterEntryWinLatin1;
550
551 case kCFStringEncodingNextStepLatin:
552 return &__CFConverterEntryNextStepLatin;
553
554 case kCFStringEncodingUTF8:
555 return &__CFConverterEntryUTF8;
556
557 default: {
558 return NULL;
559 }
560 }
561 }
562
563 CF_INLINE _CFEncodingConverter *__CFEncodingConverterFromDefinition(const CFStringEncodingConverter *definition) {
564 #define NUM_OF_ENTRIES_CYCLE (10)
565 static CFSpinLock_t _indexLock = CFSpinLockInit;
566 static uint32_t _currentIndex = 0;
567 static uint32_t _allocatedSize = 0;
568 static _CFEncodingConverter *_allocatedEntries = NULL;
569 _CFEncodingConverter *converter;
570
571
572 __CFSpinLock(&_indexLock);
573 if ((_currentIndex + 1) >= _allocatedSize) {
574 _currentIndex = 0;
575 _allocatedSize = 0;
576 _allocatedEntries = NULL;
577 }
578 if (_allocatedEntries == NULL) { // Not allocated yet
579 _allocatedEntries = (_CFEncodingConverter *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(_CFEncodingConverter) * NUM_OF_ENTRIES_CYCLE, 0);
580 _allocatedSize = NUM_OF_ENTRIES_CYCLE;
581 converter = &(_allocatedEntries[_currentIndex]);
582 } else {
583 converter = &(_allocatedEntries[++_currentIndex]);
584 }
585 __CFSpinUnlock(&_indexLock);
586
587 switch (definition->encodingClass) {
588 case kCFStringEncodingConverterStandard:
589 converter->toBytes = (_CFToBytesProc)definition->toBytes;
590 converter->toUnicode = (_CFToUnicodeProc)definition->toUnicode;
591 converter->toCanonicalUnicode = (_CFToUnicodeProc)definition->toUnicode;
592 converter->_toBytes = NULL;
593 converter->_toUnicode = NULL;
594 converter->maxLen = 2;
595 break;
596
597 case kCFStringEncodingConverterCheapEightBit:
598 converter->toBytes = __CFToBytesCheapEightBitWrapper;
599 converter->toUnicode = __CFToUnicodeCheapEightBitWrapper;
600 converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapEightBitWrapper;
601 converter->_toBytes = definition->toBytes;
602 converter->_toUnicode = definition->toUnicode;
603 converter->maxLen = 1;
604 break;
605
606 case kCFStringEncodingConverterStandardEightBit:
607 converter->toBytes = __CFToBytesStandardEightBitWrapper;
608 converter->toUnicode = __CFToUnicodeStandardEightBitWrapper;
609 converter->toCanonicalUnicode = __CFToCanonicalUnicodeStandardEightBitWrapper;
610 converter->_toBytes = definition->toBytes;
611 converter->_toUnicode = definition->toUnicode;
612 converter->maxLen = definition->maxDecomposedCharLen;
613 break;
614
615 case kCFStringEncodingConverterCheapMultiByte:
616 converter->toBytes = __CFToBytesCheapMultiByteWrapper;
617 converter->toUnicode = __CFToUnicodeCheapMultiByteWrapper;
618 converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapMultiByteWrapper;
619 converter->_toBytes = definition->toBytes;
620 converter->_toUnicode = definition->toUnicode;
621 converter->maxLen = definition->maxBytesPerChar;
622 break;
623
624 case kCFStringEncodingConverterPlatformSpecific:
625 converter->toBytes = NULL;
626 converter->toUnicode = NULL;
627 converter->toCanonicalUnicode = NULL;
628 converter->_toBytes = NULL;
629 converter->_toUnicode = NULL;
630 converter->maxLen = 0;
631 converter->toBytesLen = NULL;
632 converter->toUnicodeLen = NULL;
633 converter->toBytesFallback = NULL;
634 converter->toUnicodeFallback = NULL;
635 converter->toBytesPrecompose = NULL;
636 converter->isValidCombiningChar = NULL;
637 return converter;
638
639 default: // Shouln't be here
640 return NULL;
641 }
642
643 converter->toBytesLen = (definition->toBytesLen ? definition->toBytesLen : (CFStringEncodingToBytesLenProc)(uintptr_t)definition->maxBytesPerChar);
644 converter->toUnicodeLen = (definition->toUnicodeLen ? definition->toUnicodeLen : (CFStringEncodingToUnicodeLenProc)(uintptr_t)definition->maxDecomposedCharLen);
645 converter->toBytesFallback = (definition->toBytesFallback ? definition->toBytesFallback : __CFDefaultToBytesFallbackProc);
646 converter->toUnicodeFallback = (definition->toUnicodeFallback ? definition->toUnicodeFallback : __CFDefaultToUnicodeFallbackProc);
647 converter->toBytesPrecompose = (definition->toBytesPrecompose ? definition->toBytesPrecompose : NULL);
648 converter->isValidCombiningChar = (definition->isValidCombiningChar ? definition->isValidCombiningChar : NULL);
649
650 return converter;
651 }
652
653 CF_INLINE const CFStringEncodingConverter *__CFStringEncodingConverterGetDefinition(_CFConverterEntry *entry) {
654 if (!entry) return NULL;
655
656 switch (entry->encoding) {
657 case kCFStringEncodingASCII:
658 return &__CFConverterASCII;
659
660 case kCFStringEncodingISOLatin1:
661 return &__CFConverterISOLatin1;
662
663 case kCFStringEncodingMacRoman:
664 return &__CFConverterMacRoman;
665
666 case kCFStringEncodingWindowsLatin1:
667 return &__CFConverterWinLatin1;
668
669 case kCFStringEncodingNextStepLatin:
670 return &__CFConverterNextStepLatin;
671
672 case kCFStringEncodingUTF8:
673 return &__CFConverterUTF8;
674
675 default:
676 return NULL;
677 }
678 }
679
680 static const _CFEncodingConverter *__CFGetConverter(uint32_t encoding) {
681 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
682
683 if (!entry) return NULL;
684
685 if (!entry->converter) {
686 const CFStringEncodingConverter *definition = __CFStringEncodingConverterGetDefinition(entry);
687
688 if (definition) {
689 entry->converter = __CFEncodingConverterFromDefinition(definition);
690 entry->toBytesFallback = definition->toBytesFallback;
691 entry->toUnicodeFallback = definition->toUnicodeFallback;
692 }
693 }
694
695 return (_CFEncodingConverter *)entry->converter;
696 }
697
698 /* Public API
699 */
700 uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
701 if (encoding == kCFStringEncodingUTF8) {
702 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
703 CFIndex convertedCharLen;
704 CFIndex usedLen;
705
706
707 if ((flags & kCFStringEncodingUseCanonical) || (flags & kCFStringEncodingUseHFSPlusCanonical)) {
708 (void)CFUniCharDecompose(characters, numChars, &convertedCharLen, (void *)bytes, maxByteLen, &usedLen, true, kCFUniCharUTF8Format, (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false));
709 } else {
710 if (!__CFToUTF8) {
711 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
712 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
713 }
714 convertedCharLen = __CFToUTF8(0, characters, numChars, bytes, maxByteLen, &usedLen);
715 }
716 if (usedCharLen) *usedCharLen = convertedCharLen;
717 if (usedByteLen) *usedByteLen = usedLen;
718
719 if (convertedCharLen == numChars) {
720 return kCFStringEncodingConversionSuccess;
721 } else if (maxByteLen && (maxByteLen == usedLen)) {
722 return kCFStringEncodingInsufficientOutputBufferLength;
723 } else {
724 return kCFStringEncodingInvalidInputStream;
725 }
726 } else {
727 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
728 CFIndex usedLen = 0;
729 CFIndex localUsedByteLen;
730 CFIndex theUsedByteLen = 0;
731 uint32_t theResult = kCFStringEncodingConversionSuccess;
732 CFStringEncodingToBytesPrecomposeProc toBytesPrecompose = NULL;
733 CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar = NULL;
734
735 if (!converter) return kCFStringEncodingConverterUnavailable;
736
737 if (flags & kCFStringEncodingSubstituteCombinings) {
738 if (!(flags & kCFStringEncodingAllowLossyConversion)) isValidCombiningChar = converter->isValidCombiningChar;
739 } else {
740 isValidCombiningChar = converter->isValidCombiningChar;
741 if (!(flags & kCFStringEncodingIgnoreCombinings)) {
742 toBytesPrecompose = converter->toBytesPrecompose;
743 flags |= kCFStringEncodingComposeCombinings;
744 }
745 }
746
747
748 while ((usedLen < numChars) && (!maxByteLen || (theUsedByteLen < maxByteLen))) {
749 if ((usedLen += TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) < numChars) {
750 CFIndex dummy;
751
752 if (isValidCombiningChar && (usedLen > 0) && isValidCombiningChar(characters[usedLen])) {
753 if (toBytesPrecompose) {
754 CFIndex localUsedLen = usedLen;
755
756 while (isValidCombiningChar(characters[--usedLen]));
757 theUsedByteLen += localUsedByteLen;
758 if (converter->maxLen > 1) {
759 TO_BYTE(converter, flags, characters + usedLen, localUsedLen - usedLen, NULL, 0, &localUsedByteLen);
760 theUsedByteLen -= localUsedByteLen;
761 } else {
762 theUsedByteLen--;
763 }
764 if ((localUsedLen = toBytesPrecompose(flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) > 0) {
765 usedLen += localUsedLen;
766 if ((usedLen < numChars) && isValidCombiningChar(characters[usedLen])) { // There is a non-base char not combined remaining
767 theUsedByteLen += localUsedByteLen;
768 theResult = kCFStringEncodingInvalidInputStream;
769 break;
770 }
771 } else if (flags & kCFStringEncodingAllowLossyConversion) {
772 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
773
774 if (lossyByte) {
775 while (isValidCombiningChar(characters[++usedLen]));
776 localUsedByteLen = 1;
777 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
778 } else {
779 ++usedLen;
780 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
781 }
782 } else {
783 theResult = kCFStringEncodingInvalidInputStream;
784 break;
785 }
786 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
787 theUsedByteLen += localUsedByteLen;
788 theResult = kCFStringEncodingInsufficientOutputBufferLength;
789 break;
790 } else if (flags & kCFStringEncodingIgnoreCombinings) {
791 while ((++usedLen < numChars) && isValidCombiningChar(characters[usedLen]));
792 } else {
793 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
794
795 theUsedByteLen += localUsedByteLen;
796 if (lossyByte) {
797 ++usedLen;
798 localUsedByteLen = 1;
799 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
800 } else {
801 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
802 }
803 }
804 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
805 theUsedByteLen += localUsedByteLen;
806
807 if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
808 CFIndex localUsedLen;
809
810 localUsedByteLen = 0;
811 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
812 }
813 if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
814 break;
815 } else if (flags & kCFStringEncodingAllowLossyConversion) {
816 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
817
818 theUsedByteLen += localUsedByteLen;
819 if (lossyByte) {
820 ++usedLen;
821 localUsedByteLen = 1;
822 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
823 } else {
824 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
825 }
826 } else {
827 theUsedByteLen += localUsedByteLen;
828 theResult = kCFStringEncodingInvalidInputStream;
829 break;
830 }
831 }
832 theUsedByteLen += localUsedByteLen;
833 }
834
835 if (usedLen < numChars && maxByteLen && theResult == kCFStringEncodingConversionSuccess) {
836 if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
837 CFIndex localUsedLen;
838
839 localUsedByteLen = 0;
840 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
841 }
842 if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
843 }
844 if (usedByteLen) *usedByteLen = theUsedByteLen;
845 if (usedCharLen) *usedCharLen = usedLen;
846
847 return theResult;
848 }
849 }
850
851 uint32_t CFStringEncodingBytesToUnicode(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, CFIndex *usedByteLen, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
852 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
853 CFIndex usedLen = 0;
854 CFIndex theUsedCharLen = 0;
855 CFIndex localUsedCharLen;
856 uint32_t theResult = kCFStringEncodingConversionSuccess;
857
858 if (!converter) return kCFStringEncodingConverterUnavailable;
859
860
861 while ((usedLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
862 if ((usedLen += TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen)) < numBytes) {
863 CFIndex tempUsedCharLen;
864
865 if (maxCharLen && ((maxCharLen == theUsedCharLen + localUsedCharLen) || (((flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical)) || (maxCharLen == theUsedCharLen + localUsedCharLen + 1)) && TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, NULL, 0, &tempUsedCharLen)))) { // buffer was filled up
866 theUsedCharLen += localUsedCharLen;
867 theResult = kCFStringEncodingInsufficientOutputBufferLength;
868 break;
869 } else if (flags & kCFStringEncodingAllowLossyConversion) {
870 theUsedCharLen += localUsedCharLen;
871 usedLen += TO_UNICODE_FALLBACK(converter, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen);
872 } else {
873 theUsedCharLen += localUsedCharLen;
874 theResult = kCFStringEncodingInvalidInputStream;
875 break;
876 }
877 }
878 theUsedCharLen += localUsedCharLen;
879 }
880
881 if (usedLen < numBytes && maxCharLen && theResult == kCFStringEncodingConversionSuccess) {
882 theResult = kCFStringEncodingInsufficientOutputBufferLength;
883 }
884 if (usedCharLen) *usedCharLen = theUsedCharLen;
885 if (usedByteLen) *usedByteLen = usedLen;
886
887 return theResult;
888 }
889
890 __private_extern__ bool CFStringEncodingIsValidEncoding(uint32_t encoding) {
891 return (CFStringEncodingGetConverter(encoding) ? true : false);
892 }
893
894 __private_extern__ const char *CFStringEncodingName(uint32_t encoding) {
895 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
896 if (entry) return entry->encodingName;
897 return NULL;
898 }
899
900 __private_extern__ const char **CFStringEncodingCanonicalCharsetNames(uint32_t encoding) {
901 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
902 if (entry) return entry->ianaNames;
903 return NULL;
904 }
905
906 __private_extern__ uint32_t CFStringEncodingGetScriptCodeForEncoding(CFStringEncoding encoding) {
907 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
908
909 return (entry ? entry->scriptCode : ((encoding & 0x0FFF) == kCFStringEncodingUnicode ? kCFStringEncodingUnicode : (encoding < 0xFF ? encoding : kCFStringEncodingInvalidId)));
910 }
911
912 __private_extern__ CFIndex CFStringEncodingCharLengthForBytes(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes) {
913 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
914
915 if (converter) {
916 uintptr_t switchVal = (uintptr_t)(converter->toUnicodeLen);
917
918 if (switchVal < 0xFFFF) {
919 return switchVal * numBytes;
920 } else {
921 return converter->toUnicodeLen(flags, bytes, numBytes);
922 }
923 }
924
925 return 0;
926 }
927
928 __private_extern__ CFIndex CFStringEncodingByteLengthForCharacters(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars) {
929 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
930
931 if (converter) {
932 uintptr_t switchVal = (uintptr_t)(converter->toBytesLen);
933
934 if (switchVal < 0xFFFF) {
935 return switchVal * numChars;
936 } else {
937 return converter->toBytesLen(flags, characters, numChars);
938 }
939 }
940
941 return 0;
942 }
943
944 __private_extern__ void CFStringEncodingRegisterFallbackProcedures(uint32_t encoding, CFStringEncodingToBytesFallbackProc toBytes, CFStringEncodingToUnicodeFallbackProc toUnicode) {
945 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
946
947 if (entry && __CFGetConverter(encoding)) {
948 ((_CFEncodingConverter*)entry->converter)->toBytesFallback = (toBytes ? toBytes : entry->toBytesFallback);
949 ((_CFEncodingConverter*)entry->converter)->toUnicodeFallback = (toUnicode ? toUnicode : entry->toUnicodeFallback);
950 }
951 }
952
953 __private_extern__ const CFStringEncodingConverter *CFStringEncodingGetConverter(uint32_t encoding) {
954 return __CFStringEncodingConverterGetDefinition(__CFStringEncodingConverterGetEntry(encoding));
955 }
956
957 static const uint32_t __CFBuiltinEncodings[] = {
958 kCFStringEncodingMacRoman,
959 kCFStringEncodingWindowsLatin1,
960 kCFStringEncodingISOLatin1,
961 kCFStringEncodingNextStepLatin,
962 kCFStringEncodingASCII,
963 kCFStringEncodingUTF8,
964 /* These seven are available only in CFString-level */
965 kCFStringEncodingNonLossyASCII,
966
967 kCFStringEncodingUTF16,
968 kCFStringEncodingUTF16BE,
969 kCFStringEncodingUTF16LE,
970
971 kCFStringEncodingUTF32,
972 kCFStringEncodingUTF32BE,
973 kCFStringEncodingUTF32LE,
974
975 kCFStringEncodingInvalidId,
976 };
977
978
979 __private_extern__ const uint32_t *CFStringEncodingListOfAvailableEncodings(void) {
980 return __CFBuiltinEncodings;
981 }
982
983
984 #undef TO_BYTE
985 #undef TO_UNICODE
986 #undef ASCIINewLine
987 #undef kSurrogateHighStart
988 #undef kSurrogateHighEnd
989 #undef kSurrogateLowStart
990 #undef kSurrogateLowEnd
991 #undef TO_BYTE_FALLBACK
992 #undef TO_UNICODE_FALLBACK
993 #undef EXTRA_BASE
994 #undef NUM_OF_ENTRIES_CYCLE
995