]> git.saurik.com Git - apple/cf.git/blob - StringEncodings.subproj/CFStringEncodingConverter.c
CF-368.tar.gz
[apple/cf.git] / StringEncodings.subproj / CFStringEncodingConverter.c
1 /*
2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFStringEncodingConverter.c
24 Copyright 1998-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
26 */
27
28 #include "CFInternal.h"
29 #include <CoreFoundation/CFArray.h>
30 #include <CoreFoundation/CFDictionary.h>
31 #include "CFUniChar.h"
32 #include "CFUtilitiesPriv.h"
33 #include "CFUnicodeDecomposition.h"
34 #include "CFStringEncodingConverterExt.h"
35 #include "CFStringEncodingConverterPriv.h"
36 #include <stdlib.h>
37 #if !defined(__MACOS8__)
38 #ifdef __WIN32__
39 #include <windows.h>
40 #else // Mach, HP-UX, Solaris
41 #include <pthread.h>
42 #endif
43 #endif __MACOS8__
44
45
46 /* Macros
47 */
48 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->_toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->toBytes)(flags,chars,numChars,bytes,max,used))
49 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->_toUnicode ? (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->toUnicode)(flags,bytes,numBytes,chars,max,used))
50
51 #define LineSeparator 0x2028
52 #define ParagraphSeparator 0x2029
53 #define ASCIINewLine 0x0a
54 #define kSurrogateHighStart 0xD800
55 #define kSurrogateHighEnd 0xDBFF
56 #define kSurrogateLowStart 0xDC00
57 #define kSurrogateLowEnd 0xDFFF
58
59 /* Mapping 128..255 to lossy ASCII
60 */
61 static const struct {
62 unsigned char chars[4];
63 } _toLossyASCIITable[] = {
64 {{' ', 0, 0, 0}}, // NO-BREAK SPACE
65 {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
66 {{'c', 0, 0, 0}}, // CENT SIGN
67 {{'L', 0, 0, 0}}, // POUND SIGN
68 {{'$', 0, 0, 0}}, // CURRENCY SIGN
69 {{'Y', 0, 0, 0}}, // YEN SIGN
70 {{'|', 0, 0, 0}}, // BROKEN BAR
71 {{0, 0, 0, 0}}, // SECTION SIGN
72 {{0, 0, 0, 0}}, // DIAERESIS
73 {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
74 {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
75 {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
76 {{0, 0, 0, 0}}, // NOT SIGN
77 {{'-', 0, 0, 0}}, // SOFT HYPHEN
78 {{'(', 'R', ')', 0}}, // REGISTERED SIGN
79 {{0, 0, 0, 0}}, // MACRON
80 {{0, 0, 0, 0}}, // DEGREE SIGN
81 {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
82 {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
83 {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
84 {{0, 0, 0, 0}}, // ACUTE ACCENT
85 {{0, 0, 0, 0}}, // MICRO SIGN
86 {{0, 0, 0, 0}}, // PILCROW SIGN
87 {{0, 0, 0, 0}}, // MIDDLE DOT
88 {{0, 0, 0, 0}}, // CEDILLA
89 {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
90 {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
91 {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
92 {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
93 {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
94 {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
95 {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
96 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
97 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
98 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
99 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
100 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
101 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
102 {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
103 {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
104 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
105 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
106 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
107 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
108 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
109 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
110 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
111 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
112 {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
113 {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
114 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
115 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
116 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
117 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
118 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
119 {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
120 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
121 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
122 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
123 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
124 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
125 {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
126 {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
127 {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
128 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
129 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
130 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
131 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
132 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
133 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
134 {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
135 {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
136 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
137 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
138 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
139 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
140 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
141 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
142 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
143 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
144 {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
145 {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
146 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
147 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
148 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
149 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
150 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
151 {{'/', 0, 0, 0}}, // DIVISION SIGN
152 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
153 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
154 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
155 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
156 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
157 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
158 {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
159 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
160 };
161
162 CF_INLINE UInt32 __CFToASCIILatin1Fallback(UniChar character, UInt8 *bytes, UInt32 maxByteLen) {
163 const char *losChars = (const unsigned char*)_toLossyASCIITable + (character - 0xA0) * sizeof(unsigned char[4]);
164 unsigned int numBytes = 0;
165 int idx, max = (maxByteLen && (maxByteLen < 4) ? maxByteLen : 4);
166
167 for (idx = 0;idx < max;idx++) {
168 if (losChars[idx]) {
169 if (maxByteLen) bytes[idx] = losChars[idx];
170 ++numBytes;
171 } else {
172 break;
173 }
174 }
175
176 return numBytes;
177 }
178
179 static UInt32 __CFDefaultToBytesFallbackProc(const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
180 if (*characters < 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
181 if (maxByteLen) *bytes = (UInt8)(*characters - 0x80);
182 *usedByteLen = 1;
183 return 1;
184 } else if (*characters < 0x100) {
185 *usedByteLen = __CFToASCIILatin1Fallback(*characters, bytes, maxByteLen);
186 return 1;
187 } else if (*characters >= kSurrogateHighStart && *characters <= kSurrogateLowEnd) {
188 if (maxByteLen) *bytes = '?';
189 *usedByteLen = 1;
190 return (numChars > 1 && *characters <= kSurrogateLowStart && *(characters + 1) >= kSurrogateLowStart && *(characters + 1) <= kSurrogateLowEnd ? 2 : 1);
191 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceCharacterSet)) {
192 if (maxByteLen) *bytes = ' ';
193 *usedByteLen = 1;
194 return 1;
195 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceAndNewlineCharacterSet)) {
196 if (maxByteLen) *bytes = ASCIINewLine;
197 *usedByteLen = 1;
198 return 1;
199 } else if (!CFUniCharIsMemberOf(*characters, kCFUniCharLetterCharacterSet)) {
200 *usedByteLen = 0;
201 return 1;
202 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharDecomposableCharacterSet)) {
203 UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
204
205 (void)CFUniCharDecomposeCharacter(*characters, decomposed, MAX_DECOMPOSED_LENGTH);
206 if (*decomposed < 0x80) {
207 if (maxByteLen) *bytes = (UInt8)(*decomposed);
208 *usedByteLen = 1;
209 return 1;
210 } else {
211 UTF16Char theChar = *decomposed;
212
213 return __CFDefaultToBytesFallbackProc(&theChar, 1, bytes, maxByteLen, usedByteLen);
214 }
215 } else {
216 if (maxByteLen) *bytes = '?';
217 *usedByteLen = 1;
218 return 1;
219 }
220 }
221
222 static UInt32 __CFDefaultToUnicodeFallbackProc(const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
223 if (maxCharLen) *characters = (UniChar)'?';
224 *usedCharLen = 1;
225 return 1;
226 }
227
228 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
229 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
230
231 #define EXTRA_BASE (0x0F00)
232
233 /* Wrapper funcs for non-standard converters
234 */
235 static UInt32 __CFToBytesCheapEightBitWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
236 UInt32 processedCharLen = 0;
237 UInt32 length = (maxByteLen && (maxByteLen < numChars) ? maxByteLen : numChars);
238 uint8_t byte;
239
240 while (processedCharLen < length) {
241 if (!((CFStringEncodingCheapEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], &byte)) break;
242
243 if (maxByteLen) bytes[processedCharLen] = byte;
244 processedCharLen++;
245 }
246
247 *usedByteLen = processedCharLen;
248 return processedCharLen;
249 }
250
251 static UInt32 __CFToUnicodeCheapEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
252 UInt32 processedByteLen = 0;
253 UInt32 length = (maxCharLen && (maxCharLen < numBytes) ? maxCharLen : numBytes);
254 UniChar character;
255
256 while (processedByteLen < length) {
257 if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
258
259 if (maxCharLen) characters[processedByteLen] = character;
260 processedByteLen++;
261 }
262
263 *usedCharLen = processedByteLen;
264 return processedByteLen;
265 }
266
267 static UInt32 __CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
268 UInt32 processedByteLen = 0;
269 UInt32 theUsedCharLen = 0;
270 UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
271 UInt32 usedLen;
272 UniChar character;
273 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
274
275 while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
276 if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
277
278 if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
279 uint32_t idx;
280
281 usedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
282 *usedCharLen = theUsedCharLen;
283
284 for (idx = 0;idx < usedLen;idx++) {
285 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
286 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
287 theUsedCharLen += 2;
288 if (maxCharLen) {
289 charBuffer[idx] = charBuffer[idx] - 0x10000;
290 *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
291 *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
292 }
293 } else {
294 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
295 ++theUsedCharLen;
296 *(characters++) = charBuffer[idx];
297 }
298 }
299 } else {
300 if (maxCharLen) *(characters++) = character;
301 ++theUsedCharLen;
302 }
303 processedByteLen++;
304 }
305
306 *usedCharLen = theUsedCharLen;
307 return processedByteLen;
308 }
309
310 static UInt32 __CFToBytesStandardEightBitWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
311 UInt32 processedCharLen = 0;
312 uint8_t byte;
313 UInt32 usedLen;
314
315 *usedByteLen = 0;
316
317 while (numChars && (!maxByteLen || (*usedByteLen < maxByteLen))) {
318 if (!(usedLen = ((CFStringEncodingStandardEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters, numChars, &byte))) break;
319
320 if (maxByteLen) bytes[*usedByteLen] = byte;
321 (*usedByteLen)++;
322 characters += usedLen;
323 numChars -= usedLen;
324 processedCharLen += usedLen;
325 }
326
327 return processedCharLen;
328 }
329
330 static UInt32 __CFToUnicodeStandardEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
331 UInt32 processedByteLen = 0;
332 #if defined(__MACOS8__) || defined(__WIN32__)
333 UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
334 #else
335 UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
336 #endif
337 UInt32 usedLen;
338
339 *usedCharLen = 0;
340
341 while ((processedByteLen < numBytes) && (!maxCharLen || (*usedCharLen < maxCharLen))) {
342 if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
343
344 if (maxCharLen) {
345 uint16_t idx;
346
347 if (*usedCharLen + usedLen > maxCharLen) break;
348
349 for (idx = 0;idx < usedLen;idx++) {
350 characters[*usedCharLen + idx] = charBuffer[idx];
351 }
352 }
353 *usedCharLen += usedLen;
354 processedByteLen++;
355 }
356
357 return processedByteLen;
358 }
359
360 static UInt32 __CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
361 UInt32 processedByteLen = 0;
362 #if defined(__MACOS8__) || defined(__WIN32__)
363 UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
364 #else
365 UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
366 #endif
367 UTF32Char decompBuffer[MAX_DECOMPOSED_LENGTH];
368 UInt32 usedLen;
369 UInt32 decompedLen;
370 UInt32 idx, decompIndex;
371 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
372 UInt32 theUsedCharLen = 0;
373
374 while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
375 if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
376
377 for (idx = 0;idx < usedLen;idx++) {
378 if (CFUniCharIsDecomposableCharacter(charBuffer[idx], isHFSPlus)) {
379 decompedLen = CFUniCharDecomposeCharacter(charBuffer[idx], decompBuffer, MAX_DECOMPOSED_LENGTH);
380 *usedCharLen = theUsedCharLen;
381
382 for (decompIndex = 0;decompIndex < decompedLen;decompIndex++) {
383 if (decompBuffer[decompIndex] > 0xFFFF) { // Non-BMP
384 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
385 theUsedCharLen += 2;
386 if (maxCharLen) {
387 charBuffer[idx] = charBuffer[idx] - 0x10000;
388 *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
389 *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
390 }
391 } else {
392 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
393 ++theUsedCharLen;
394 *(characters++) = charBuffer[idx];
395 }
396 }
397 } else {
398 if (maxCharLen) *(characters++) = charBuffer[idx];
399 ++theUsedCharLen;
400 }
401 }
402 processedByteLen++;
403 }
404
405 *usedCharLen = theUsedCharLen;
406 return processedByteLen;
407 }
408
409 static UInt32 __CFToBytesCheapMultiByteWrapper(const void *converter, UInt32 flags, const UniChar *characters, UInt32 numChars, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
410 UInt32 processedCharLen = 0;
411 #if defined(__MACOS8__) || defined(__WIN32__)
412 uint8_t byteBuffer[20]; // Dynamic stack allocation is GNU specific
413 #else
414 uint8_t byteBuffer[((const _CFEncodingConverter*)converter)->maxLen];
415 #endif
416 UInt32 usedLen;
417
418 *usedByteLen = 0;
419
420 while ((processedCharLen < numChars) && (!maxByteLen || (*usedByteLen < maxByteLen))) {
421 if (!(usedLen = ((CFStringEncodingCheapMultiByteToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], byteBuffer))) break;
422
423 if (maxByteLen) {
424 uint16_t idx;
425
426 if (*usedByteLen + usedLen > maxByteLen) break;
427
428 for (idx = 0;idx <usedLen;idx++) {
429 bytes[*usedByteLen + idx] = byteBuffer[idx];
430 }
431 }
432
433 *usedByteLen += usedLen;
434 processedCharLen++;
435 }
436
437 return processedCharLen;
438 }
439
440 static UInt32 __CFToUnicodeCheapMultiByteWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
441 UInt32 processedByteLen = 0;
442 UniChar character;
443 UInt32 usedLen;
444
445 *usedCharLen = 0;
446
447 while (numBytes && (!maxCharLen || (*usedCharLen < maxCharLen))) {
448 if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
449
450 if (maxCharLen) *(characters++) = character;
451 (*usedCharLen)++;
452 processedByteLen += usedLen;
453 bytes += usedLen;
454 numBytes -= usedLen;
455 }
456
457 return processedByteLen;
458 }
459
460 static UInt32 __CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
461 UInt32 processedByteLen = 0;
462 UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
463 UniChar character;
464 UInt32 usedLen;
465 UInt32 decomposedLen;
466 UInt32 theUsedCharLen = 0;
467 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
468
469 while (numBytes && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
470 if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
471
472 if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
473 uint32_t idx;
474
475 decomposedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
476 *usedCharLen = theUsedCharLen;
477
478 for (idx = 0;idx < decomposedLen;idx++) {
479 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
480 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
481 theUsedCharLen += 2;
482 if (maxCharLen) {
483 charBuffer[idx] = charBuffer[idx] - 0x10000;
484 *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
485 *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
486 }
487 } else {
488 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
489 ++theUsedCharLen;
490 *(characters++) = charBuffer[idx];
491 }
492 }
493 } else {
494 if (maxCharLen) *(characters++) = character;
495 ++theUsedCharLen;
496 }
497
498 processedByteLen += usedLen;
499 bytes += usedLen;
500 numBytes -= usedLen;
501 }
502 *usedCharLen = theUsedCharLen;
503 return processedByteLen;
504 }
505
506 /* static functions
507 */
508 static _CFConverterEntry __CFConverterEntryASCII = {
509 kCFStringEncodingASCII, NULL,
510 "Western (ASCII)", {"us-ascii", "ascii", "iso-646-us", NULL}, NULL, NULL, NULL, NULL,
511 kCFStringEncodingMacRoman // We use string encoding's script range here
512 };
513
514 static _CFConverterEntry __CFConverterEntryISOLatin1 = {
515 kCFStringEncodingISOLatin1, NULL,
516 "Western (ISO Latin 1)", {"iso-8859-1", "latin1","iso-latin-1", NULL}, NULL, NULL, NULL, NULL,
517 kCFStringEncodingMacRoman // We use string encoding's script range here
518 };
519
520 static _CFConverterEntry __CFConverterEntryMacRoman = {
521 kCFStringEncodingMacRoman, NULL,
522 "Western (Mac OS Roman)", {"macintosh", "mac", "x-mac-roman", NULL}, NULL, NULL, NULL, NULL,
523 kCFStringEncodingMacRoman // We use string encoding's script range here
524 };
525
526 static _CFConverterEntry __CFConverterEntryWinLatin1 = {
527 kCFStringEncodingWindowsLatin1, NULL,
528 "Western (Windows Latin 1)", {"windows-1252", "cp1252", "windows latin1", NULL}, NULL, NULL, NULL, NULL,
529 kCFStringEncodingMacRoman // We use string encoding's script range here
530 };
531
532 static _CFConverterEntry __CFConverterEntryNextStepLatin = {
533 kCFStringEncodingNextStepLatin, NULL,
534 "Western (NextStep)", {"x-nextstep", NULL, NULL, NULL}, NULL, NULL, NULL, NULL,
535 kCFStringEncodingMacRoman // We use string encoding's script range here
536 };
537
538 static _CFConverterEntry __CFConverterEntryUTF8 = {
539 kCFStringEncodingUTF8, NULL,
540 "UTF-8", {"utf-8", "unicode-1-1-utf8", NULL, NULL}, NULL, NULL, NULL, NULL,
541 kCFStringEncodingUnicode // We use string encoding's script range here
542 };
543
544 CF_INLINE _CFConverterEntry *__CFStringEncodingConverterGetEntry(UInt32 encoding) {
545 switch (encoding) {
546 case kCFStringEncodingInvalidId:
547 case kCFStringEncodingASCII:
548 return &__CFConverterEntryASCII;
549
550 case kCFStringEncodingISOLatin1:
551 return &__CFConverterEntryISOLatin1;
552
553 case kCFStringEncodingMacRoman:
554 return &__CFConverterEntryMacRoman;
555
556 case kCFStringEncodingWindowsLatin1:
557 return &__CFConverterEntryWinLatin1;
558
559 case kCFStringEncodingNextStepLatin:
560 return &__CFConverterEntryNextStepLatin;
561
562 case kCFStringEncodingUTF8:
563 return &__CFConverterEntryUTF8;
564
565 default: {
566 return NULL;
567 }
568 }
569 }
570
571 CF_INLINE _CFEncodingConverter *__CFEncodingConverterFromDefinition(const CFStringEncodingConverter *definition) {
572 #define NUM_OF_ENTRIES_CYCLE (10)
573 static CFSpinLock_t _indexLock = 0;
574 static UInt32 _currentIndex = 0;
575 static UInt32 _allocatedSize = 0;
576 static _CFEncodingConverter *_allocatedEntries = NULL;
577 _CFEncodingConverter *converter;
578
579
580 __CFSpinLock(&_indexLock);
581 if ((_currentIndex + 1) >= _allocatedSize) {
582 _currentIndex = 0;
583 _allocatedSize = 0;
584 _allocatedEntries = NULL;
585 }
586 if (_allocatedEntries == NULL) { // Not allocated yet
587 _allocatedEntries = (_CFEncodingConverter *)CFAllocatorAllocate(NULL, sizeof(_CFEncodingConverter) * NUM_OF_ENTRIES_CYCLE, 0);
588 _allocatedSize = NUM_OF_ENTRIES_CYCLE;
589 converter = &(_allocatedEntries[_currentIndex]);
590 } else {
591 converter = &(_allocatedEntries[++_currentIndex]);
592 }
593 __CFSpinUnlock(&_indexLock);
594
595 switch (definition->encodingClass) {
596 case kCFStringEncodingConverterStandard:
597 converter->toBytes = definition->toBytes;
598 converter->toUnicode = definition->toUnicode;
599 converter->toCanonicalUnicode = definition->toUnicode;
600 converter->_toBytes = NULL;
601 converter->_toUnicode = NULL;
602 converter->maxLen = 2;
603 break;
604
605 case kCFStringEncodingConverterCheapEightBit:
606 converter->toBytes = __CFToBytesCheapEightBitWrapper;
607 converter->toUnicode = __CFToUnicodeCheapEightBitWrapper;
608 converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapEightBitWrapper;
609 converter->_toBytes = definition->toBytes;
610 converter->_toUnicode = definition->toUnicode;
611 converter->maxLen = 1;
612 break;
613
614 case kCFStringEncodingConverterStandardEightBit:
615 converter->toBytes = __CFToBytesStandardEightBitWrapper;
616 converter->toUnicode = __CFToUnicodeStandardEightBitWrapper;
617 converter->toCanonicalUnicode = __CFToCanonicalUnicodeStandardEightBitWrapper;
618 converter->_toBytes = definition->toBytes;
619 converter->_toUnicode = definition->toUnicode;
620 converter->maxLen = definition->maxDecomposedCharLen;
621 break;
622
623 case kCFStringEncodingConverterCheapMultiByte:
624 converter->toBytes = __CFToBytesCheapMultiByteWrapper;
625 converter->toUnicode = __CFToUnicodeCheapMultiByteWrapper;
626 converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapMultiByteWrapper;
627 converter->_toBytes = definition->toBytes;
628 converter->_toUnicode = definition->toUnicode;
629 converter->maxLen = definition->maxBytesPerChar;
630 break;
631
632 case kCFStringEncodingConverterPlatformSpecific:
633 converter->toBytes = NULL;
634 converter->toUnicode = NULL;
635 converter->toCanonicalUnicode = NULL;
636 converter->_toBytes = NULL;
637 converter->_toUnicode = NULL;
638 converter->maxLen = 0;
639 converter->toBytesLen = NULL;
640 converter->toUnicodeLen = NULL;
641 converter->toBytesFallback = NULL;
642 converter->toUnicodeFallback = NULL;
643 converter->toBytesPrecompose = NULL;
644 converter->isValidCombiningChar = NULL;
645 return converter;
646
647 default: // Shouln't be here
648 return NULL;
649 }
650
651 converter->toBytesLen = (definition->toBytesLen ? definition->toBytesLen : (CFStringEncodingToBytesLenProc)(UInt32)definition->maxBytesPerChar);
652 converter->toUnicodeLen = (definition->toUnicodeLen ? definition->toUnicodeLen : (CFStringEncodingToUnicodeLenProc)(UInt32)definition->maxDecomposedCharLen);
653 converter->toBytesFallback = (definition->toBytesFallback ? definition->toBytesFallback : __CFDefaultToBytesFallbackProc);
654 converter->toUnicodeFallback = (definition->toUnicodeFallback ? definition->toUnicodeFallback : __CFDefaultToUnicodeFallbackProc);
655 converter->toBytesPrecompose = (definition->toBytesPrecompose ? definition->toBytesPrecompose : NULL);
656 converter->isValidCombiningChar = (definition->isValidCombiningChar ? definition->isValidCombiningChar : NULL);
657
658 return converter;
659 }
660
661 CF_INLINE const CFStringEncodingConverter *__CFStringEncodingConverterGetDefinition(_CFConverterEntry *entry) {
662 if (!entry) return NULL;
663
664 switch (entry->encoding) {
665 case kCFStringEncodingASCII:
666 return &__CFConverterASCII;
667
668 case kCFStringEncodingISOLatin1:
669 return &__CFConverterISOLatin1;
670
671 case kCFStringEncodingMacRoman:
672 return &__CFConverterMacRoman;
673
674 case kCFStringEncodingWindowsLatin1:
675 return &__CFConverterWinLatin1;
676
677 case kCFStringEncodingNextStepLatin:
678 return &__CFConverterNextStepLatin;
679
680 case kCFStringEncodingUTF8:
681 return &__CFConverterUTF8;
682
683 default:
684 return NULL;
685 }
686 }
687
688 static const _CFEncodingConverter *__CFGetConverter(UInt32 encoding) {
689 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
690
691 if (!entry) return NULL;
692
693 if (!entry->converter) {
694 const CFStringEncodingConverter *definition = __CFStringEncodingConverterGetDefinition(entry);
695
696 if (definition) {
697 entry->converter = __CFEncodingConverterFromDefinition(definition);
698 entry->toBytesFallback = definition->toBytesFallback;
699 entry->toUnicodeFallback = definition->toUnicodeFallback;
700 }
701 }
702
703 return (_CFEncodingConverter *)entry->converter;
704 }
705
706 /* Public API
707 */
708 UInt32 CFStringEncodingUnicodeToBytes(UInt32 encoding, UInt32 flags, const UniChar *characters, UInt32 numChars, UInt32 *usedCharLen, uint8_t *bytes, UInt32 maxByteLen, UInt32 *usedByteLen) {
709 if (encoding == kCFStringEncodingUTF8) {
710 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
711 uint32_t convertedCharLen;
712 uint32_t usedLen;
713
714
715 if ((flags & kCFStringEncodingUseCanonical) || (flags & kCFStringEncodingUseHFSPlusCanonical)) {
716 (void)CFUniCharDecompose(characters, numChars, &convertedCharLen, (void *)bytes, maxByteLen, &usedLen, true, kCFUniCharUTF8Format, (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false));
717 } else {
718 if (!__CFToUTF8) {
719 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
720 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
721 }
722 convertedCharLen = __CFToUTF8(0, characters, numChars, bytes, maxByteLen, (UInt32 *)&usedLen);
723 }
724 if (usedCharLen) *usedCharLen = convertedCharLen;
725 if (usedByteLen) *usedByteLen = usedLen;
726
727 if (convertedCharLen == numChars) {
728 return kCFStringEncodingConversionSuccess;
729 } else if (maxByteLen && (maxByteLen == usedLen)) {
730 return kCFStringEncodingInsufficientOutputBufferLength;
731 } else {
732 return kCFStringEncodingInvalidInputStream;
733 }
734 } else {
735 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
736 UInt32 usedLen = 0;
737 UInt32 localUsedByteLen;
738 UInt32 theUsedByteLen = 0;
739 UInt32 theResult = kCFStringEncodingConversionSuccess;
740 CFStringEncodingToBytesPrecomposeProc toBytesPrecompose = NULL;
741 CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar = NULL;
742
743 if (!converter) return kCFStringEncodingConverterUnavailable;
744
745 if (flags & kCFStringEncodingSubstituteCombinings) {
746 if (!(flags & kCFStringEncodingAllowLossyConversion)) isValidCombiningChar = converter->isValidCombiningChar;
747 } else {
748 isValidCombiningChar = converter->isValidCombiningChar;
749 if (!(flags & kCFStringEncodingIgnoreCombinings)) {
750 toBytesPrecompose = converter->toBytesPrecompose;
751 flags |= kCFStringEncodingComposeCombinings;
752 }
753 }
754
755
756 while ((usedLen < numChars) && (!maxByteLen || (theUsedByteLen < maxByteLen))) {
757 if ((usedLen += TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) < numChars) {
758 UInt32 dummy;
759
760 if (isValidCombiningChar && (usedLen > 0) && isValidCombiningChar(characters[usedLen])) {
761 if (toBytesPrecompose) {
762 UInt32 localUsedLen = usedLen;
763
764 while (isValidCombiningChar(characters[--usedLen]));
765 theUsedByteLen += localUsedByteLen;
766 if (converter->maxLen > 1) {
767 TO_BYTE(converter, flags, characters + usedLen, localUsedLen - usedLen, NULL, 0, &localUsedByteLen);
768 theUsedByteLen -= localUsedByteLen;
769 } else {
770 theUsedByteLen--;
771 }
772 if ((localUsedLen = toBytesPrecompose(flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) > 0) {
773 usedLen += localUsedLen;
774 if ((usedLen < numChars) && isValidCombiningChar(characters[usedLen])) { // There is a non-base char not combined remaining
775 theUsedByteLen += localUsedByteLen;
776 theResult = kCFStringEncodingInvalidInputStream;
777 break;
778 }
779 } else if (flags & kCFStringEncodingAllowLossyConversion) {
780 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
781
782 if (lossyByte) {
783 while (isValidCombiningChar(characters[++usedLen]));
784 localUsedByteLen = 1;
785 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
786 } else {
787 ++usedLen;
788 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
789 }
790 } else {
791 theResult = kCFStringEncodingInvalidInputStream;
792 break;
793 }
794 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
795 theUsedByteLen += localUsedByteLen;
796 theResult = kCFStringEncodingInsufficientOutputBufferLength;
797 break;
798 } else if (flags & kCFStringEncodingIgnoreCombinings) {
799 while ((++usedLen < numChars) && isValidCombiningChar(characters[usedLen]));
800 } else {
801 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
802
803 theUsedByteLen += localUsedByteLen;
804 if (lossyByte) {
805 ++usedLen;
806 localUsedByteLen = 1;
807 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
808 } else {
809 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
810 }
811 }
812 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
813 theUsedByteLen += localUsedByteLen;
814
815 if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
816 UInt32 localUsedLen;
817
818 localUsedByteLen = 0;
819 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
820 }
821 if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
822 break;
823 } else if (flags & kCFStringEncodingAllowLossyConversion) {
824 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
825
826 theUsedByteLen += localUsedByteLen;
827 if (lossyByte) {
828 ++usedLen;
829 localUsedByteLen = 1;
830 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
831 } else {
832 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
833 }
834 } else {
835 theUsedByteLen += localUsedByteLen;
836 theResult = kCFStringEncodingInvalidInputStream;
837 break;
838 }
839 }
840 theUsedByteLen += localUsedByteLen;
841 }
842
843 if (usedLen < numChars && maxByteLen && theResult == kCFStringEncodingConversionSuccess) {
844 if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
845 UInt32 localUsedLen;
846
847 localUsedByteLen = 0;
848 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
849 }
850 if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
851 }
852 if (usedByteLen) *usedByteLen = theUsedByteLen;
853 if (usedCharLen) *usedCharLen = usedLen;
854
855 return theResult;
856 }
857 }
858
859 UInt32 CFStringEncodingBytesToUnicode(UInt32 encoding, UInt32 flags, const uint8_t *bytes, UInt32 numBytes, UInt32 *usedByteLen, UniChar *characters, UInt32 maxCharLen, UInt32 *usedCharLen) {
860 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
861 UInt32 usedLen = 0;
862 UInt32 theUsedCharLen = 0;
863 UInt32 localUsedCharLen;
864 UInt32 theResult = kCFStringEncodingConversionSuccess;
865
866 if (!converter) return kCFStringEncodingConverterUnavailable;
867
868
869 while ((usedLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
870 if ((usedLen += TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen)) < numBytes) {
871 UInt32 tempUsedCharLen;
872
873 if (maxCharLen && ((maxCharLen == theUsedCharLen + localUsedCharLen) || ((flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical)) && TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, NULL, 0, &tempUsedCharLen)))) { // buffer was filled up
874 theUsedCharLen += localUsedCharLen;
875 theResult = kCFStringEncodingInsufficientOutputBufferLength;
876 break;
877 } else if (flags & kCFStringEncodingAllowLossyConversion) {
878 theUsedCharLen += localUsedCharLen;
879 usedLen += TO_UNICODE_FALLBACK(converter, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen);
880 } else {
881 theUsedCharLen += localUsedCharLen;
882 theResult = kCFStringEncodingInvalidInputStream;
883 break;
884 }
885 }
886 theUsedCharLen += localUsedCharLen;
887 }
888
889 if (usedLen < numBytes && maxCharLen && theResult == kCFStringEncodingConversionSuccess) {
890 theResult = kCFStringEncodingInsufficientOutputBufferLength;
891 }
892 if (usedCharLen) *usedCharLen = theUsedCharLen;
893 if (usedByteLen) *usedByteLen = usedLen;
894
895 return theResult;
896 }
897
898 __private_extern__ Boolean CFStringEncodingIsValidEncoding(UInt32 encoding) {
899 return (CFStringEncodingGetConverter(encoding) ? true : false);
900 }
901
902 __private_extern__ const char *CFStringEncodingName(UInt32 encoding) {
903 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
904 if (entry) return entry->encodingName;
905 return NULL;
906 }
907
908 __private_extern__ const char **CFStringEncodingCanonicalCharsetNames(UInt32 encoding) {
909 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
910 if (entry) return entry->ianaNames;
911 return NULL;
912 }
913
914 __private_extern__ UInt32 CFStringEncodingGetScriptCodeForEncoding(CFStringEncoding encoding) {
915 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
916
917 return (entry ? entry->scriptCode : ((encoding & 0x0FFF) == kCFStringEncodingUnicode ? kCFStringEncodingUnicode : (encoding < 0xFF ? encoding : kCFStringEncodingInvalidId)));
918 }
919
920 __private_extern__ UInt32 CFStringEncodingCharLengthForBytes(UInt32 encoding, UInt32 flags, const uint8_t *bytes, UInt32 numBytes) {
921 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
922
923 if (converter) {
924 UInt32 switchVal = (UInt32)(converter->toUnicodeLen);
925
926 if (switchVal < 0xFFFF)
927 return switchVal * numBytes;
928 else
929 return converter->toUnicodeLen(flags, bytes, numBytes);
930 }
931
932 return 0;
933 }
934
935 __private_extern__ UInt32 CFStringEncodingByteLengthForCharacters(UInt32 encoding, UInt32 flags, const UniChar *characters, UInt32 numChars) {
936 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
937
938 if (converter) {
939 UInt32 switchVal = (UInt32)(converter->toBytesLen);
940
941 if (switchVal < 0xFFFF)
942 return switchVal * numChars;
943 else
944 return converter->toBytesLen(flags, characters, numChars);
945 }
946
947 return 0;
948 }
949
950 __private_extern__ void CFStringEncodingRegisterFallbackProcedures(UInt32 encoding, CFStringEncodingToBytesFallbackProc toBytes, CFStringEncodingToUnicodeFallbackProc toUnicode) {
951 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
952
953 if (entry && __CFGetConverter(encoding)) {
954 ((_CFEncodingConverter*)entry->converter)->toBytesFallback = (toBytes ? toBytes : entry->toBytesFallback);
955 ((_CFEncodingConverter*)entry->converter)->toUnicodeFallback = (toUnicode ? toUnicode : entry->toUnicodeFallback);
956 }
957 }
958
959 __private_extern__ const CFStringEncodingConverter *CFStringEncodingGetConverter(UInt32 encoding) {
960 return __CFStringEncodingConverterGetDefinition(__CFStringEncodingConverterGetEntry(encoding));
961 }
962
963 static const UInt32 __CFBuiltinEncodings[] = {
964 kCFStringEncodingMacRoman,
965 kCFStringEncodingWindowsLatin1,
966 kCFStringEncodingISOLatin1,
967 kCFStringEncodingNextStepLatin,
968 kCFStringEncodingASCII,
969 kCFStringEncodingUTF8,
970 /* These seven are available only in CFString-level */
971 kCFStringEncodingNonLossyASCII,
972
973 kCFStringEncodingUTF16,
974 kCFStringEncodingUTF16BE,
975 kCFStringEncodingUTF16LE,
976
977 kCFStringEncodingUTF32,
978 kCFStringEncodingUTF32BE,
979 kCFStringEncodingUTF32LE,
980
981 kCFStringEncodingInvalidId,
982 };
983
984
985 __private_extern__ const UInt32 *CFStringEncodingListOfAvailableEncodings(void) {
986 return __CFBuiltinEncodings;
987 }
988